From 350dafc7b199bd01f3431ebd6071c015807955d9 Mon Sep 17 00:00:00 2001 From: gcca Date: Wed, 22 Aug 2018 13:43:19 -0500 Subject: [PATCH 01/30] [replace-function] API definition --- CMakeLists.txt | 1 + include/gdf/cffi/functions.h | 14 ++++++- src/replace.cu | 25 ++++++++++++ src/tests/CMakeLists.txt | 1 + src/tests/replace/CMakeLists.txt | 1 + src/tests/replace/replace-test.cu | 65 +++++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 src/replace.cu create mode 100644 src/tests/replace/CMakeLists.txt create mode 100644 src/tests/replace/replace-test.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index dbed67c0..24f02916 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,7 @@ cuda_add_library(gdf SHARED src/sqls_ops.cu src/streamcompactionops.cu src/unaryops.cu + src/replace.cu #src/windowedops.cu ) diff --git a/include/gdf/cffi/functions.h b/include/gdf/cffi/functions.h index 8252419c..62abd7d7 100644 --- a/include/gdf/cffi/functions.h +++ b/include/gdf/cffi/functions.h @@ -491,7 +491,7 @@ gdf_error gpu_hash_columns(gdf_column ** columns_to_hash, int num_columns, gdf_c gdf_error get_column_byte_width(gdf_column * col, int * width); -/* +/* Multi-Column SQL ops: WHERE (Filtering) ORDER-BY @@ -559,3 +559,15 @@ gdf_error gdf_group_by_count(int ncols, // # columns //(multi-gather based on indices, which are needed anyway) gdf_column* out_col_agg, //aggregation result gdf_context* ctxt); //struct with additional info: bool is_sorted, flag_sort_or_hash, bool flag_count_distinct + +/* replace */ + +/// \brief Replace `to_replace` data of `column` with `values` +/// \param[in/out] column data +/// \param[in] to_replace contains values of column that will be replaced +/// \param[in] values contains the replacement values +/// +/// Note that `to_replace` and `values` are related by the index +gdf_error gdf_replace(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values); diff --git a/src/replace.cu b/src/replace.cu new file mode 100644 index 00000000..18c745c0 --- /dev/null +++ b/src/replace.cu @@ -0,0 +1,25 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +gdf_error +gdf_replace(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values) { + return GDF_CUDA_ERROR; +} diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index ef9dceb2..b14ed1aa 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -43,5 +43,6 @@ add_subdirectory(datetime) add_subdirectory(hashing) add_subdirectory(join) add_subdirectory(sqls) +add_subdirectory(replace) message(STATUS "******** Tests are ready ********") diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt new file mode 100644 index 00000000..def5158d --- /dev/null +++ b/src/tests/replace/CMakeLists.txt @@ -0,0 +1 @@ +configure_test(replace-test replace-test.cu) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu new file mode 100644 index 00000000..fb496b4f --- /dev/null +++ b/src/tests/replace/replace-test.cu @@ -0,0 +1,65 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include + +static gdf_column +CreateGdfColumn(const std::initializer_list list) { + const std::vector host_data(list); + thrust::device_vector device_data(host_data); + thrust::device_vector device_valid(1, 0); + + return gdf_column{ + .data = thrust::raw_pointer_cast(device_data.data()), + .valid = thrust::raw_pointer_cast(device_valid.data()), + .size = 0, + .dtype = GDF_INT64, + .null_count = 0, + .dtype_info = {}, + }; +} + +TEST(ReplaceTest, API) { + gdf_column column = CreateGdfColumn({1, 2, 3, 4, 5, 6, 7, 8}); + + gdf_column to_replace = CreateGdfColumn({2, 4, 6, 8}); + gdf_column values = CreateGdfColumn({0, 2, 4, 6}); + + const gdf_error status = gdf_replace(&column, &to_replace, &values); + + EXPECT_EQ(GDF_SUCCESS, status); + + const thrust::device_ptr data_ptr( + static_cast(column.data)); + + constexpr std::ptrdiff_t ptrdiff = 8; + + const thrust::device_vector device_data(data_ptr, + data_ptr + ptrdiff); + + EXPECT_EQ(0, device_data[1]); + EXPECT_EQ(2, device_data[3]); + EXPECT_EQ(4, device_data[5]); + EXPECT_EQ(6, device_data[7]); +} From 5a8b93a1d002eb695beac11b4f6891fba06136f9 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 12:27:52 -0500 Subject: [PATCH 02/30] [replace-function] Add first implementation --- src/replace.cu | 94 ++++++++++++++++++++++++++++++- src/tests/replace/replace-test.cu | 52 +++++++++-------- 2 files changed, 120 insertions(+), 26 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 18c745c0..162e35f4 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,11 +15,103 @@ * limitations under the License. */ +#include +#include +#include + #include +namespace { + +template +struct gdf_dtype_traits {}; + +#define DTYPE_FACTORY(DTYPE, T) \ + template <> \ + struct gdf_dtype_traits { \ + typedef T value_type; \ + } + +DTYPE_FACTORY(INT8, std::int8_t); +DTYPE_FACTORY(INT16, std::int16_t); +DTYPE_FACTORY(INT32, std::int32_t); +DTYPE_FACTORY(INT64, std::int64_t); +DTYPE_FACTORY(FLOAT32, float); +DTYPE_FACTORY(FLOAT64, double); +DTYPE_FACTORY(DATE32, std::int32_t); +DTYPE_FACTORY(DATE64, std::int64_t); +DTYPE_FACTORY(TIMESTAMP, std::int64_t); + +#undef DTYPE_FACTORY + +template +static inline void +Replace(T *const data, + const std::size_t data_size, + T *const to_replace, + T *const values, + const std::size_t replacement_size) { + thrust::device_ptr begin(data); + thrust::device_ptr end = begin + static_cast(data_size); + + thrust::device_ptr from(to_replace); + thrust::device_ptr to(values); + + for (std::size_t i = 0; i < replacement_size; i++) { + thrust::replace(begin, end, from[i], to[i]); + } +} + +static inline bool +NotEqualReplacementSize(const gdf_column *to_replace, + const gdf_column *values) { + return to_replace->size != values->size; +} + +static inline bool +NotSameDType(const gdf_column *column, + const gdf_column *to_replace, + const gdf_column *values) { + return column->dtype != to_replace->dtype + || to_replace->dtype != values->dtype; +} + +} // namespace + gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - return GDF_CUDA_ERROR; + if (NotEqualReplacementSize(to_replace, values)) { return GDF_CUDA_ERROR; } + + if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } + + switch (column->dtype) { +#define WHEN(DTYPE) \ + case GDF_##DTYPE: { \ + using value_type = gdf_dtype_traits::value_type; \ + Replace(static_cast(column->data), \ + column->size, \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + values->size); \ + } break + + WHEN(INT8); + WHEN(INT16); + WHEN(INT32); + WHEN(INT64); + WHEN(FLOAT32); + WHEN(FLOAT64); + WHEN(DATE32); + WHEN(DATE64); + WHEN(TIMESTAMP); + +#undef WHEN + + case GDF_invalid: + default: return GDF_CUDA_ERROR; + } + + return GDF_SUCCESS; } diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index fb496b4f..803a737e 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -17,23 +17,24 @@ #include -#include - -#include #include #include -static gdf_column -CreateGdfColumn(const std::initializer_list list) { - const std::vector host_data(list); - thrust::device_vector device_data(host_data); - thrust::device_vector device_valid(1, 0); +template +static inline thrust::device_vector +MakeDeviceVector(const std::initializer_list list) { + const std::vector column_data(list); + thrust::device_vector device_data(column_data); + return device_data; +} +static inline gdf_column +MakeGdfColumn(thrust::device_vector &device_vector) { return gdf_column{ - .data = thrust::raw_pointer_cast(device_data.data()), - .valid = thrust::raw_pointer_cast(device_valid.data()), - .size = 0, + .data = thrust::raw_pointer_cast(device_vector.data()), + .valid = nullptr, + .size = device_vector.size(), .dtype = GDF_INT64, .null_count = 0, .dtype_info = {}, @@ -41,25 +42,26 @@ CreateGdfColumn(const std::initializer_list list) { } TEST(ReplaceTest, API) { - gdf_column column = CreateGdfColumn({1, 2, 3, 4, 5, 6, 7, 8}); + thrust::device_vector device_data = + MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); + gdf_column column = MakeGdfColumn(device_data); + + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); - gdf_column to_replace = CreateGdfColumn({2, 4, 6, 8}); - gdf_column values = CreateGdfColumn({0, 2, 4, 6}); + gdf_column to_replace = MakeGdfColumn(to_replace_data); + gdf_column values = MakeGdfColumn(values_data); const gdf_error status = gdf_replace(&column, &to_replace, &values); EXPECT_EQ(GDF_SUCCESS, status); - const thrust::device_ptr data_ptr( + thrust::device_ptr results( static_cast(column.data)); - - constexpr std::ptrdiff_t ptrdiff = 8; - - const thrust::device_vector device_data(data_ptr, - data_ptr + ptrdiff); - - EXPECT_EQ(0, device_data[1]); - EXPECT_EQ(2, device_data[3]); - EXPECT_EQ(4, device_data[5]); - EXPECT_EQ(6, device_data[7]); + EXPECT_EQ(0, results[1]); + EXPECT_EQ(2, results[3]); + EXPECT_EQ(4, results[5]); + EXPECT_EQ(6, results[7]); } From adf537d506721847e197861a34dfb97ea4963d50 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 18:10:59 -0500 Subject: [PATCH 03/30] [replace-function] Add replacement by lower bound --- src/replace.cu | 54 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 162e35f4..3c8a6299 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,9 +15,10 @@ * limitations under the License. */ +#include #include #include -#include +#include #include @@ -44,22 +45,51 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY +template +class ReplaceFunctor { +public: + ReplaceFunctor(T *const raw) : data(raw) {} + + __host__ __device__ void + operator()(thrust::tuple tuple) { + const T i = thrust::get<0>(tuple); + + if (data[i] == thrust::get<1>(tuple)) { + data[i] = thrust::get<2>(tuple); + } + } + + thrust::device_ptr data; +}; + template static inline void Replace(T *const data, const std::size_t data_size, - T *const to_replace, - T *const values, + const T *const to_replace, + const T *const values, const std::size_t replacement_size) { - thrust::device_ptr begin(data); - thrust::device_ptr end = begin + static_cast(data_size); - - thrust::device_ptr from(to_replace); - thrust::device_ptr to(values); - - for (std::size_t i = 0; i < replacement_size; i++) { - thrust::replace(begin, end, from[i], to[i]); - } + const thrust::device_ptr data_begin(data); + const thrust::device_ptr data_end = + data_begin + static_cast(data_size); + + const thrust::device_ptr from_begin(to_replace); + const thrust::device_ptr from_end = + from_begin + static_cast(replacement_size); + + const thrust::device_ptr to_begin(values); + const thrust::device_ptr to_end = + to_begin + static_cast(replacement_size); + + thrust::device_vector lower_bounds(replacement_size); + thrust::lower_bound( + data_begin, data_end, from_begin, from_end, lower_bounds.begin()); + + thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple( + lower_bounds.cbegin(), from_begin, to_begin)), + thrust::make_zip_iterator(thrust::make_tuple( + lower_bounds.cend(), from_end, to_end)), + ReplaceFunctor(data)); } static inline bool From 3214b08f0434d305f41a1c7d7408518744261180 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 19:10:15 -0500 Subject: [PATCH 04/30] [replace-function] Update typed unit test --- src/tests/replace/replace-test.cu | 48 ++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 803a737e..eb93a6a1 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -21,6 +21,24 @@ #include +template +struct TypeTraits {}; + +#define TYPE_FACTORY(U, D) \ + template <> \ + struct TypeTraits { \ + static constexpr gdf_dtype dtype = GDF_##D; \ + } + +TYPE_FACTORY(std::int8_t, INT8); +TYPE_FACTORY(std::int16_t, INT16); +TYPE_FACTORY(std::int32_t, INT32); +TYPE_FACTORY(std::int64_t, INT64); +TYPE_FACTORY(float, FLOAT32); +TYPE_FACTORY(double, FLOAT64); + +#undef TYPE_FACTORY + template static inline thrust::device_vector MakeDeviceVector(const std::initializer_list list) { @@ -29,27 +47,35 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } +template static inline gdf_column -MakeGdfColumn(thrust::device_vector &device_vector) { +MakeGdfColumn(thrust::device_vector &device_vector) { return gdf_column{ .data = thrust::raw_pointer_cast(device_vector.data()), .valid = nullptr, .size = device_vector.size(), - .dtype = GDF_INT64, + .dtype = TypeTraits::dtype, .null_count = 0, .dtype_info = {}, }; } -TEST(ReplaceTest, API) { - thrust::device_vector device_data = - MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); +template +class ReplaceTest : public testing::Test {}; + +using Types = testing:: + Types; +TYPED_TEST_CASE(ReplaceTest, Types); + +TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { + thrust::device_vector device_data = + MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); gdf_column column = MakeGdfColumn(device_data); - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); gdf_column to_replace = MakeGdfColumn(to_replace_data); gdf_column values = MakeGdfColumn(values_data); @@ -58,8 +84,8 @@ TEST(ReplaceTest, API) { EXPECT_EQ(GDF_SUCCESS, status); - thrust::device_ptr results( - static_cast(column.data)); + thrust::device_ptr results( + static_cast(column.data)); EXPECT_EQ(0, results[1]); EXPECT_EQ(2, results[3]); EXPECT_EQ(4, results[5]); From 39350489078603caa87ecdc3d64866595de0c7ba Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 08:35:34 -0500 Subject: [PATCH 05/30] [replace-function] Update to replace on unordered column --- src/replace.cu | 65 ++++++++++++------------------- src/tests/replace/replace-test.cu | 26 +++++++++++++ 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 3c8a6299..9583f7d0 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,10 +15,11 @@ * limitations under the License. */ -#include #include #include +#include #include +#include #include @@ -45,51 +46,33 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY -template -class ReplaceFunctor { -public: - ReplaceFunctor(T *const raw) : data(raw) {} - - __host__ __device__ void - operator()(thrust::tuple tuple) { - const T i = thrust::get<0>(tuple); - - if (data[i] == thrust::get<1>(tuple)) { - data[i] = thrust::get<2>(tuple); - } - } - - thrust::device_ptr data; -}; - template static inline void -Replace(T *const data, - const std::size_t data_size, - const T *const to_replace, - const T *const values, - const std::size_t replacement_size) { - const thrust::device_ptr data_begin(data); - const thrust::device_ptr data_end = - data_begin + static_cast(data_size); +Replace(T *const data, + const std::ptrdiff_t data_ptrdiff, + const T *const to_replace, + const T *const values, + const std::ptrdiff_t replacement_ptrdiff) { + const thrust::device_ptr data_begin(data); + const thrust::device_ptr data_end = data_begin + data_ptrdiff; const thrust::device_ptr from_begin(to_replace); const thrust::device_ptr from_end = - from_begin + static_cast(replacement_size); + from_begin + replacement_ptrdiff; const thrust::device_ptr to_begin(values); - const thrust::device_ptr to_end = - to_begin + static_cast(replacement_size); - - thrust::device_vector lower_bounds(replacement_size); - thrust::lower_bound( - data_begin, data_end, from_begin, from_end, lower_bounds.begin()); - - thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple( - lower_bounds.cbegin(), from_begin, to_begin)), - thrust::make_zip_iterator(thrust::make_tuple( - lower_bounds.cend(), from_end, to_end)), - ReplaceFunctor(data)); + const thrust::device_ptr to_end = to_begin + replacement_ptrdiff; + + thrust::for_each( + thrust::device, + thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), + thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), + [=] __device__(thrust::tuple tuple) { + const T from = thrust::get<0>(tuple); + const T to = thrust::get<1>(tuple); + + thrust::replace(thrust::device, data_begin, data_end, from, to); + }); } static inline bool @@ -121,10 +104,10 @@ gdf_replace(gdf_column * column, case GDF_##DTYPE: { \ using value_type = gdf_dtype_traits::value_type; \ Replace(static_cast(column->data), \ - column->size, \ + static_cast(column->size), \ static_cast(to_replace->data), \ static_cast(values->data), \ - values->size); \ + static_cast(values->size)); \ } break WHEN(INT8); diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index eb93a6a1..e9f785ec 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -65,6 +65,7 @@ class ReplaceTest : public testing::Test {}; using Types = testing:: Types; + TYPED_TEST_CASE(ReplaceTest, Types); TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { @@ -91,3 +92,28 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { EXPECT_EQ(4, results[5]); EXPECT_EQ(6, results[7]); } + +TYPED_TEST(ReplaceTest, Unordered) { + thrust::device_vector device_data = + MakeDeviceVector({7, 5, 6, 3, 1, 2, 8, 4}); + gdf_column column = MakeGdfColumn(device_data); + + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); + + gdf_column to_replace = MakeGdfColumn(to_replace_data); + gdf_column values = MakeGdfColumn(values_data); + + const gdf_error status = gdf_replace(&column, &to_replace, &values); + + EXPECT_EQ(GDF_SUCCESS, status); + + thrust::device_ptr results( + static_cast(column.data)); + EXPECT_EQ(4, results[2]); + EXPECT_EQ(0, results[5]); + EXPECT_EQ(6, results[6]); + EXPECT_EQ(2, results[7]); +} From f5afc3137b8bc1da66a2242a2f70c53f1afe6f83 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 12:12:28 -0500 Subject: [PATCH 06/30] [replace-function] Update class replace functor --- src/replace.cu | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 9583f7d0..86c6f952 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -46,6 +46,25 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY +template +class ReplaceFunctor { +public: + ReplaceFunctor(T *const data, const std::ptrdiff_t data_ptrdiff) + : data_begin(data), data_end(data_begin + data_ptrdiff) {} + + void __device__ + operator()(thrust::tuple tuple) { + const T from = thrust::get<0>(tuple); + const T to = thrust::get<1>(tuple); + + thrust::replace(thrust::device, data_begin, data_end, from, to); + } + +private: + const thrust::device_ptr data_begin; + const thrust::device_ptr data_end; +}; + template static inline void Replace(T *const data, @@ -53,9 +72,6 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - const thrust::device_ptr data_begin(data); - const thrust::device_ptr data_end = data_begin + data_ptrdiff; - const thrust::device_ptr from_begin(to_replace); const thrust::device_ptr from_end = from_begin + replacement_ptrdiff; @@ -67,12 +83,7 @@ Replace(T *const data, thrust::device, thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), - [=] __device__(thrust::tuple tuple) { - const T from = thrust::get<0>(tuple); - const T to = thrust::get<1>(tuple); - - thrust::replace(thrust::device, data_begin, data_end, from, to); - }); + ReplaceFunctor(data, data_ptrdiff)); } static inline bool From ef3f3825659fcf6ae90d47ddcd4880d6f97503c5 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 16:37:27 -0500 Subject: [PATCH 07/30] [replace-function] Common test fixtures --- src/tests/replace/replace-test.cu | 68 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index e9f785ec..7dbcf75e 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -61,32 +61,45 @@ MakeGdfColumn(thrust::device_vector &device_vector) { } template -class ReplaceTest : public testing::Test {}; +class ReplaceTest : public testing::Test { +protected: + thrust::device_ptr + test(const std::initializer_list &data_list, + const std::initializer_list &to_replace_list, + const std::initializer_list &values_list) { + device_data = MakeDeviceVector(data_list); + to_replace_data = MakeDeviceVector(to_replace_list); + values_data = MakeDeviceVector(values_list); -using Types = testing:: - Types; + column = MakeGdfColumn(device_data); + to_replace = MakeGdfColumn(to_replace_data); + values = MakeGdfColumn(values_data); -TYPED_TEST_CASE(ReplaceTest, Types); + const gdf_error status = gdf_replace(&column, &to_replace, &values); -TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { - thrust::device_vector device_data = - MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); - gdf_column column = MakeGdfColumn(device_data); + EXPECT_EQ(GDF_SUCCESS, status); - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); + return thrust::device_ptr(static_cast(column.data)); + } - gdf_column to_replace = MakeGdfColumn(to_replace_data); - gdf_column values = MakeGdfColumn(values_data); + thrust::device_vector device_data; + thrust::device_vector to_replace_data; + thrust::device_vector values_data; - const gdf_error status = gdf_replace(&column, &to_replace, &values); + gdf_column column; + gdf_column to_replace; + gdf_column values; +}; - EXPECT_EQ(GDF_SUCCESS, status); +using Types = testing:: + Types; + +TYPED_TEST_CASE(ReplaceTest, Types); + +TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { + thrust::device_ptr results = + this->test({1, 2, 3, 4, 5, 6, 7, 8}, {2, 4, 6, 8}, {0, 2, 4, 6}); - thrust::device_ptr results( - static_cast(column.data)); EXPECT_EQ(0, results[1]); EXPECT_EQ(2, results[3]); EXPECT_EQ(4, results[5]); @@ -94,24 +107,9 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { } TYPED_TEST(ReplaceTest, Unordered) { - thrust::device_vector device_data = - MakeDeviceVector({7, 5, 6, 3, 1, 2, 8, 4}); - gdf_column column = MakeGdfColumn(device_data); - - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); - - gdf_column to_replace = MakeGdfColumn(to_replace_data); - gdf_column values = MakeGdfColumn(values_data); - - const gdf_error status = gdf_replace(&column, &to_replace, &values); - - EXPECT_EQ(GDF_SUCCESS, status); + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 4, 6, 8}, {0, 2, 4, 6}); - thrust::device_ptr results( - static_cast(column.data)); EXPECT_EQ(4, results[2]); EXPECT_EQ(0, results[5]); EXPECT_EQ(6, results[6]); From 003bd9dde76b9ee3dcfc7213af2760f6a9f67793 Mon Sep 17 00:00:00 2001 From: William Malpica Date: Fri, 31 Aug 2018 15:20:29 -0500 Subject: [PATCH 08/30] created larger scale test for replace funtion --- src/replace.cu | 2 +- src/tests/replace/replace-test.cu | 59 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/replace.cu b/src/replace.cu index 86c6f952..e59d5e7b 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -106,7 +106,7 @@ gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - if (NotEqualReplacementSize(to_replace, values)) { return GDF_CUDA_ERROR; } + if (NotEqualReplacementSize(to_replace, values)) { return GDF_COLUMN_SIZE_MISMATCH; } if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 7dbcf75e..3af1232d 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -21,6 +21,8 @@ #include +#include + template struct TypeTraits {}; @@ -115,3 +117,60 @@ TYPED_TEST(ReplaceTest, Unordered) { EXPECT_EQ(6, results[6]); EXPECT_EQ(2, results[7]); } + + + + +TEST(LargeScaleReplaceTest, LargeScaleReplaceTest) { + + { + const int DATA_SIZE = 1000000; + const int REPLACE_SIZE = 10000; + + srand((unsigned)time(NULL)); + + std::vector column_data(DATA_SIZE); + for (int i = 0; i < DATA_SIZE; i++){ + column_data[i] = rand() % (2*REPLACE_SIZE); + } + + std::vector from(DATA_SIZE); + std::vector to(DATA_SIZE); + int count = 0; + for (int i = 0; i < 7; i++){ + for (int j = 0; j < REPLACE_SIZE; j += 7){ + from[i+j] = count; + count++; + to[i+j] = count; + } + } + + thrust::device_vector device_data(column_data); + gdf_column data_gdf = MakeGdfColumn(device_data); + thrust::device_vector device_from(from); + gdf_column from_gdf = MakeGdfColumn(device_from); + thrust::device_vector device_to(to); + gdf_column to_gdf = MakeGdfColumn(device_to); + + const gdf_error status = gdf_replace(&data_gdf, &from_gdf, &to_gdf); + + EXPECT_EQ(GDF_SUCCESS, status); + + std::vector replaced_data(DATA_SIZE); + thrust::copy(device_data.begin(), device_data.end(), replaced_data.begin()); + + count = 0; + for (int i = 0; i < DATA_SIZE; i++){ + if (column_data[i] < REPLACE_SIZE){ + EXPECT_EQ(column_data[i] + 1, replaced_data[i]); + if (column_data[i] + 1 != replaced_data[i]){ + std::cout<<"failed at "< 20){ + break; + } + } + } + } + } +} From 5553c676031e5e181d7c66f386959d4c0ad5892a Mon Sep 17 00:00:00 2001 From: gcca Date: Mon, 3 Sep 2018 16:47:05 -0500 Subject: [PATCH 09/30] [replace-function] Replace kernel --- src/replace.cu | 93 +++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index e59d5e7b..1c4fd333 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,6 +15,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -28,10 +30,10 @@ namespace { template struct gdf_dtype_traits {}; -#define DTYPE_FACTORY(DTYPE, T) \ - template <> \ - struct gdf_dtype_traits { \ - typedef T value_type; \ +#define DTYPE_FACTORY(DTYPE, T) \ + template <> \ + struct gdf_dtype_traits { \ + typedef T value_type; \ } DTYPE_FACTORY(INT8, std::int8_t); @@ -47,23 +49,27 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY template -class ReplaceFunctor { -public: - ReplaceFunctor(T *const data, const std::ptrdiff_t data_ptrdiff) - : data_begin(data), data_end(data_begin + data_ptrdiff) {} - - void __device__ - operator()(thrust::tuple tuple) { - const T from = thrust::get<0>(tuple); - const T to = thrust::get<1>(tuple); - - thrust::replace(thrust::device, data_begin, data_end, from, to); +__global__ void +replace_kernel(T *const data, + const std::ptrdiff_t data_ptrdiff, + const T *const to_replace, + const T *const values, + const std::ptrdiff_t replacement_ptrdiff) { + for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; + i < data_ptrdiff; + i += blockDim.x * gridDim.x) { + const thrust::device_ptr begin(to_replace); + const thrust::device_ptr end(begin + replacement_ptrdiff); + + const thrust::device_ptr found = // TODO: find by map kernel + thrust::find(thrust::device, begin, end, data[i]); + + if (found != end) { + std::size_t j = thrust::distance(begin, found); + data[i] = values[j]; + } } - -private: - const thrust::device_ptr data_begin; - const thrust::device_ptr data_end; -}; +} template static inline void @@ -72,18 +78,19 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - const thrust::device_ptr from_begin(to_replace); - const thrust::device_ptr from_end = - from_begin + replacement_ptrdiff; - - const thrust::device_ptr to_begin(values); - const thrust::device_ptr to_end = to_begin + replacement_ptrdiff; - - thrust::for_each( - thrust::device, - thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), - thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), - ReplaceFunctor(data, data_ptrdiff)); + int multiprocessors; + // TODO: device selection + cudaDeviceGetAttribute(&multiprocessors, cudaDevAttrMultiProcessorCount, 0); + + std::size_t blocks = std::ceil(data_ptrdiff / (multiprocessors * 256.)); + + replace_kernel + <<>>( // TODO: calc blocks and threads + data, + data_ptrdiff, + to_replace, + values, + replacement_ptrdiff); } static inline bool @@ -106,19 +113,21 @@ gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - if (NotEqualReplacementSize(to_replace, values)) { return GDF_COLUMN_SIZE_MISMATCH; } + if (NotEqualReplacementSize(to_replace, values)) { + return GDF_COLUMN_SIZE_MISMATCH; + } if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } switch (column->dtype) { -#define WHEN(DTYPE) \ - case GDF_##DTYPE: { \ - using value_type = gdf_dtype_traits::value_type; \ - Replace(static_cast(column->data), \ - static_cast(column->size), \ - static_cast(to_replace->data), \ - static_cast(values->data), \ - static_cast(values->size)); \ +#define WHEN(DTYPE) \ + case GDF_##DTYPE: { \ + using value_type = gdf_dtype_traits::value_type; \ + Replace(static_cast(column->data), \ + static_cast(column->size), \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + static_cast(values->size)); \ } break WHEN(INT8); @@ -134,7 +143,7 @@ gdf_replace(gdf_column * column, #undef WHEN case GDF_invalid: - default: return GDF_CUDA_ERROR; + default: return GDF_UNSUPPORTED_DTYPE; } return GDF_SUCCESS; From 0f3891dc938a4e69833b57f5d483b70d5974d7a5 Mon Sep 17 00:00:00 2001 From: gcca Date: Tue, 4 Sep 2018 08:41:16 -0500 Subject: [PATCH 10/30] [replace-function] Check device attribute status --- src/replace.cu | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 1c4fd333..fc031397 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -51,12 +51,11 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); template __global__ void replace_kernel(T *const data, - const std::ptrdiff_t data_ptrdiff, + const std::size_t data_size, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; - i < data_ptrdiff; + for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < data_size; i += blockDim.x * gridDim.x) { const thrust::device_ptr begin(to_replace); const thrust::device_ptr end(begin + replacement_ptrdiff); @@ -72,25 +71,30 @@ replace_kernel(T *const data, } template -static inline void +static inline gdf_error Replace(T *const data, - const std::ptrdiff_t data_ptrdiff, + const std::size_t data_size, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { int multiprocessors; // TODO: device selection - cudaDeviceGetAttribute(&multiprocessors, cudaDevAttrMultiProcessorCount, 0); + const cudaError_t status = cudaDeviceGetAttribute( + &multiprocessors, cudaDevAttrMultiProcessorCount, 0); - std::size_t blocks = std::ceil(data_ptrdiff / (multiprocessors * 256.)); + if (status != cudaSuccess) { return GDF_CUDA_ERROR; } + + const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); replace_kernel <<>>( // TODO: calc blocks and threads data, - data_ptrdiff, + data_size, to_replace, values, replacement_ptrdiff); + + return GDF_SUCCESS; } static inline bool @@ -123,12 +127,12 @@ gdf_replace(gdf_column * column, #define WHEN(DTYPE) \ case GDF_##DTYPE: { \ using value_type = gdf_dtype_traits::value_type; \ - Replace(static_cast(column->data), \ - static_cast(column->size), \ - static_cast(to_replace->data), \ - static_cast(values->data), \ - static_cast(values->size)); \ - } break + return Replace(static_cast(column->data), \ + static_cast(column->size), \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + static_cast(values->size)); \ + } WHEN(INT8); WHEN(INT16); @@ -145,6 +149,4 @@ gdf_replace(gdf_column * column, case GDF_invalid: default: return GDF_UNSUPPORTED_DTYPE; } - - return GDF_SUCCESS; } From 26c522a31edcd9d35711d4632a8ee0145c65a8fd Mon Sep 17 00:00:00 2001 From: gcca Date: Tue, 4 Sep 2018 09:20:29 -0500 Subject: [PATCH 11/30] [replace-function] Move ptr construction (invariant) --- src/replace.cu | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index fc031397..7602412b 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -50,22 +50,23 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); template __global__ void -replace_kernel(T *const data, - const std::size_t data_size, - const T *const to_replace, - const T *const values, - const std::ptrdiff_t replacement_ptrdiff) { +replace_kernel(T *const data, + const std::size_t data_size, + const T *const values, + const thrust::device_ptr to_replace_begin, + const thrust::device_ptr to_replace_end) { for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < data_size; i += blockDim.x * gridDim.x) { - const thrust::device_ptr begin(to_replace); - const thrust::device_ptr end(begin + replacement_ptrdiff); + // TODO: find by map kernel + const thrust::device_ptr found_ptr = thrust::find( + thrust::device, to_replace_begin, to_replace_end, data[i]); - const thrust::device_ptr found = // TODO: find by map kernel - thrust::find(thrust::device, begin, end, data[i]); + if (found_ptr != to_replace_end) { + typename thrust::iterator_traits< + const thrust::device_ptr>::difference_type + value_found_index = thrust::distance(to_replace_begin, found_ptr); - if (found != end) { - std::size_t j = thrust::distance(begin, found); - data[i] = values[j]; + data[i] = values[value_found_index]; } } } @@ -86,13 +87,17 @@ Replace(T *const data, const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); + const thrust::device_ptr to_replace_begin(to_replace); + const thrust::device_ptr to_replace_end(to_replace_begin + + replacement_ptrdiff); + replace_kernel <<>>( // TODO: calc blocks and threads data, data_size, - to_replace, values, - replacement_ptrdiff); + to_replace_begin, + to_replace_end); return GDF_SUCCESS; } From 2a7a023316423149a34e3dd572e7c8ce9d602dd3 Mon Sep 17 00:00:00 2001 From: gcca Date: Wed, 5 Sep 2018 10:06:35 -0500 Subject: [PATCH 12/30] [replace-function] Add replace benchmark against cpu --- src/replace.cu | 11 +-- src/tests/replace/CMakeLists.txt | 56 +++++++++++++ src/tests/replace/replace-benchmark.cu | 110 +++++++++++++++++++++++++ src/tests/replace/replace-test.cu | 35 +------- src/tests/replace/utils.h | 64 ++++++++++++++ 5 files changed, 233 insertions(+), 43 deletions(-) create mode 100644 src/tests/replace/replace-benchmark.cu create mode 100644 src/tests/replace/utils.h diff --git a/src/replace.cu b/src/replace.cu index 7602412b..3edf5124 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -78,21 +78,14 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - int multiprocessors; - // TODO: device selection - const cudaError_t status = cudaDeviceGetAttribute( - &multiprocessors, cudaDevAttrMultiProcessorCount, 0); - - if (status != cudaSuccess) { return GDF_CUDA_ERROR; } - - const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); + const std::size_t blocks = std::ceil(data_size / 256.); const thrust::device_ptr to_replace_begin(to_replace); const thrust::device_ptr to_replace_end(to_replace_begin + replacement_ptrdiff); replace_kernel - <<>>( // TODO: calc blocks and threads + <<>>( // TODO: calc blocks and threads data, data_size, values, diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt index def5158d..3c9c1410 100644 --- a/src/tests/replace/CMakeLists.txt +++ b/src/tests/replace/CMakeLists.txt @@ -1 +1,57 @@ +#============================================================================= +# Copyright 2018 BlazingDB, Inc. +# Copyright 2018 Cristhian Alberto Gonzales Castillo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + configure_test(replace-test replace-test.cu) + +if (GDF_BENCHMARK) +include(ExternalProject) + +ExternalProject_Add(benchmark_ep + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=RELEASE + -DCMAKE_INSTALL_PREFIX=build + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.4.1 + UPDATE_COMMAND "" +) +ExternalProject_Get_property(benchmark_ep BINARY_DIR) +set(BENCHMARK_ROOT ${BINARY_DIR}/build) + +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) + +add_library(Google::Benchmark INTERFACE IMPORTED) +add_dependencies(Google::Benchmark benchmark_ep) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) + +add_library(Google::Benchmark::Main INTERFACE IMPORTED) +set_target_properties(Google::Benchmark::Main +PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) + +function(GDF_ADD_BENCHMARK TARGET) + list(REMOVE_AT ARGV 0) + cuda_add_executable(${TARGET} ${ARGV}) + target_link_libraries(${TARGET} + Google::Benchmark Google::Benchmark::Main gdf) +endfunction() + +GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) +endif() diff --git a/src/tests/replace/replace-benchmark.cu b/src/tests/replace/replace-benchmark.cu new file mode 100644 index 00000000..3097f7e8 --- /dev/null +++ b/src/tests/replace/replace-benchmark.cu @@ -0,0 +1,110 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include + +#include "utils.h" + +using T = std::int64_t; + +static void +BM_CPU_LoopReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + std::vector vector(length); + thrust::sequence(vector.begin(), vector.end(), 1); + + std::vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + + std::vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + + for (auto _ : state) { + for (std::size_t i = 0; i < vector.size(); i++) { + auto current = std::find( + to_replace_vector.begin(), to_replace_vector.end(), vector[i]); + if (current != to_replace_vector.end()) { + std::size_t j = + std::distance(to_replace_vector.begin(), current); + vector[i] = values_vector[j]; + } + } + } +} + +static void +BM_CPU_MapReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + std::vector vector(length); + thrust::sequence(vector.begin(), vector.end(), 1); + + std::vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + + std::vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + + for (auto _ : state) { + std::unordered_map map; + for (std::size_t i = 0; i < values_vector.size(); i++) { + map.insert({to_replace_vector[i], values_vector[i]}); + } + + for (std::size_t i = 0; i < vector.size(); i++) { + try { + vector[i] = map[vector[i]]; + } catch (...) { continue; } + } + } +} + +static void +BM_GPU_LoopReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + thrust::device_vector device_vector(length); + thrust::sequence(device_vector.begin(), device_vector.end(), 1); + gdf_column column = MakeGdfColumn(device_vector); + + thrust::device_vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + gdf_column to_replace = MakeGdfColumn(to_replace_vector); + + thrust::device_vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + gdf_column values = MakeGdfColumn(values_vector); + + for (auto _ : state) { + const gdf_error status = gdf_replace(&column, &to_replace, &values); + state.PauseTiming(); + if (status != GDF_SUCCESS) { state.SkipWithError("Failed replace"); } + state.ResumeTiming(); + } +} + +BENCHMARK(BM_CPU_LoopReplace)->Ranges({{8, 8 << 16}, {8, 512}}); +BENCHMARK(BM_CPU_MapReplace)->Ranges({{8, 8 << 16}, {8, 512}}); +BENCHMARK(BM_GPU_LoopReplace)->Ranges({{8, 8 << 16}, {8, 512}}); diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 3af1232d..ad98e0cf 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -17,29 +17,9 @@ #include -#include - #include -#include - -template -struct TypeTraits {}; - -#define TYPE_FACTORY(U, D) \ - template <> \ - struct TypeTraits { \ - static constexpr gdf_dtype dtype = GDF_##D; \ - } - -TYPE_FACTORY(std::int8_t, INT8); -TYPE_FACTORY(std::int16_t, INT16); -TYPE_FACTORY(std::int32_t, INT32); -TYPE_FACTORY(std::int64_t, INT64); -TYPE_FACTORY(float, FLOAT32); -TYPE_FACTORY(double, FLOAT64); - -#undef TYPE_FACTORY +#include "utils.h" template static inline thrust::device_vector @@ -49,19 +29,6 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } -template -static inline gdf_column -MakeGdfColumn(thrust::device_vector &device_vector) { - return gdf_column{ - .data = thrust::raw_pointer_cast(device_vector.data()), - .valid = nullptr, - .size = device_vector.size(), - .dtype = TypeTraits::dtype, - .null_count = 0, - .dtype_info = {}, - }; -} - template class ReplaceTest : public testing::Test { protected: diff --git a/src/tests/replace/utils.h b/src/tests/replace/utils.h new file mode 100644 index 00000000..2d5332f1 --- /dev/null +++ b/src/tests/replace/utils.h @@ -0,0 +1,64 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +template +struct TypeTraits {}; + +#define TYPE_FACTORY(U, D) \ + template <> \ + struct TypeTraits { \ + static constexpr gdf_dtype dtype = GDF_##D; \ + } + +TYPE_FACTORY(std::int8_t, INT8); +TYPE_FACTORY(std::int16_t, INT16); +TYPE_FACTORY(std::int32_t, INT32); +TYPE_FACTORY(std::int64_t, INT64); +TYPE_FACTORY(float, FLOAT32); +TYPE_FACTORY(double, FLOAT64); + +#undef TYPE_FACTORY + +template +static inline gdf_column +MakeGdfColumn(thrust::device_vector &device_vector) { + return gdf_column{ + .data = thrust::raw_pointer_cast(device_vector.data()), + .valid = nullptr, + .size = device_vector.size(), + .dtype = TypeTraits::dtype, + .null_count = 0, + .dtype_info = {}, + }; +} + +template +static inline gdf_column +MakeGdfColumn(std::vector &vector) { + return gdf_column{ + .data = vector.data(), + .valid = nullptr, + .size = vector.size(), + .dtype = TypeTraits::dtype, + .null_count = 0, + .dtype_info = {}, + }; +} From a2fa767ff4b8880184cd00ca41cf20d40107fc75 Mon Sep 17 00:00:00 2001 From: William Malpica Date: Fri, 28 Sep 2018 08:53:00 -0500 Subject: [PATCH 13/30] moved replace benchmark to bench folder. Added comments and more tests to replace-test --- CMakeLists.txt | 5 +++ .../Templates/GoogleTest.CMakeLists.txt.cmake | 2 +- src/bench/CMakeLists.txt | 43 +++++++++++++++++++ src/bench/replace/CMakeLists.txt | 22 ++++++++++ .../replace/replace-benchmark.cu | 2 +- src/tests/replace/CMakeLists.txt | 38 ---------------- src/tests/replace/replace-test.cu | 43 ++++++++++++++++++- 7 files changed, 114 insertions(+), 41 deletions(-) create mode 100644 src/bench/CMakeLists.txt create mode 100644 src/bench/replace/CMakeLists.txt rename src/{tests => bench}/replace/replace-benchmark.cu (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae20b140..2cd98cf9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,5 +196,10 @@ if(GTEST_FOUND) else() message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") endif() + +if(GDF_BENCHMARK) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/bench) + endif() + # Print the project summary feature_summary(WHAT ALL INCLUDE_QUIET_PACKAGES FATAL_ON_MISSING_REQUIRED_PACKAGES) diff --git a/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cmake/Templates/GoogleTest.CMakeLists.txt.cmake index 0af9e67c..c83d43eb 100644 --- a/cmake/Templates/GoogleTest.CMakeLists.txt.cmake +++ b/cmake/Templates/GoogleTest.CMakeLists.txt.cmake @@ -24,7 +24,7 @@ include(ExternalProject) ExternalProject_Add(googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG master + GIT_TAG release-1.8.1 SOURCE_DIR "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/thirdparty/googletest-src" BINARY_DIR "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/thirdparty/googletest-build" INSTALL_DIR "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/thirdparty/googletest-install" diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt new file mode 100644 index 00000000..9facb01f --- /dev/null +++ b/src/bench/CMakeLists.txt @@ -0,0 +1,43 @@ + +if(GDF_BENCHMARK) + + include(ExternalProject) + + ExternalProject_Add(benchmark_ep + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=RELEASE + -DCMAKE_INSTALL_PREFIX=build + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.4.1 + UPDATE_COMMAND "" + ) + ExternalProject_Get_property(benchmark_ep BINARY_DIR) + set(BENCHMARK_ROOT ${BINARY_DIR}/build) + + file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) + file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) + + add_library(Google::Benchmark INTERFACE IMPORTED) + add_dependencies(Google::Benchmark benchmark_ep) + set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) + set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) + + add_library(Google::Benchmark::Main INTERFACE IMPORTED) + set_target_properties(Google::Benchmark::Main + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) + + +function(GDF_ADD_BENCHMARK TARGET) + list(REMOVE_AT ARGV 0) + cuda_add_executable(${TARGET} ${ARGV}) + target_link_libraries(${TARGET} + Google::Benchmark Google::Benchmark::Main gdf) +endfunction() + +endif() + + + add_subdirectory(replace) \ No newline at end of file diff --git a/src/bench/replace/CMakeLists.txt b/src/bench/replace/CMakeLists.txt new file mode 100644 index 00000000..76bc607d --- /dev/null +++ b/src/bench/replace/CMakeLists.txt @@ -0,0 +1,22 @@ +#============================================================================= +# Copyright 2018 BlazingDB, Inc. +# Copyright 2018 Cristhian Alberto Gonzales Castillo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + + +if (GDF_BENCHMARK) + +GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) +endif() diff --git a/src/tests/replace/replace-benchmark.cu b/src/bench/replace/replace-benchmark.cu similarity index 98% rename from src/tests/replace/replace-benchmark.cu rename to src/bench/replace/replace-benchmark.cu index 3097f7e8..03a8e805 100644 --- a/src/tests/replace/replace-benchmark.cu +++ b/src/bench/replace/replace-benchmark.cu @@ -24,7 +24,7 @@ #include -#include "utils.h" +#include "../../tests/replace/utils.h" using T = std::int64_t; diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt index 3c9c1410..7da0c2f9 100644 --- a/src/tests/replace/CMakeLists.txt +++ b/src/tests/replace/CMakeLists.txt @@ -17,41 +17,3 @@ configure_test(replace-test replace-test.cu) -if (GDF_BENCHMARK) -include(ExternalProject) - -ExternalProject_Add(benchmark_ep - CMAKE_ARGS - -DCMAKE_BUILD_TYPE=RELEASE - -DCMAKE_INSTALL_PREFIX=build - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.4.1 - UPDATE_COMMAND "" -) -ExternalProject_Get_property(benchmark_ep BINARY_DIR) -set(BENCHMARK_ROOT ${BINARY_DIR}/build) - -file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) -file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) - -add_library(Google::Benchmark INTERFACE IMPORTED) -add_dependencies(Google::Benchmark benchmark_ep) -set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) -set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) - -add_library(Google::Benchmark::Main INTERFACE IMPORTED) -set_target_properties(Google::Benchmark::Main -PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) - -function(GDF_ADD_BENCHMARK TARGET) - list(REMOVE_AT ARGV 0) - cuda_add_executable(${TARGET} ${ARGV}) - target_link_libraries(${TARGET} - Google::Benchmark Google::Benchmark::Main gdf) -endfunction() - -GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) -endif() diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index ad98e0cf..253f1cbf 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -29,6 +29,9 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } + + +// This is the main teast feature template class ReplaceTest : public testing::Test { protected: @@ -65,6 +68,7 @@ using Types = testing:: TYPED_TEST_CASE(ReplaceTest, Types); +// Simple test, replacing all even values TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { thrust::device_ptr results = this->test({1, 2, 3, 4, 5, 6, 7, 8}, {2, 4, 6, 8}, {0, 2, 4, 6}); @@ -75,9 +79,11 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { EXPECT_EQ(6, results[7]); } + +// Similar test as ReplaceEvenPosition, but with unordered data TYPED_TEST(ReplaceTest, Unordered) { thrust::device_ptr results = - this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 4, 6, 8}, {0, 2, 4, 6}); + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 6, 4, 8}, {0, 4, 2, 6}); EXPECT_EQ(4, results[2]); EXPECT_EQ(0, results[5]); @@ -86,8 +92,43 @@ TYPED_TEST(ReplaceTest, Unordered) { } +// Testing with Empty Replace +TYPED_TEST(ReplaceTest, EmptyReplace) { + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {}, {}); + + EXPECT_EQ(7, results[0]); + EXPECT_EQ(5, results[1]); + EXPECT_EQ(6, results[2]); + EXPECT_EQ(3, results[3]); + EXPECT_EQ(1, results[4]); + EXPECT_EQ(2, results[5]); + EXPECT_EQ(8, results[6]); + EXPECT_EQ(4, results[7]); +} + +// Testing with Nothing To Replace +TYPED_TEST(ReplaceTest, NothingToReplace) { + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {10, 11, 12}, {15, 16, 17}); + + EXPECT_EQ(7, results[0]); + EXPECT_EQ(5, results[1]); + EXPECT_EQ(6, results[2]); + EXPECT_EQ(3, results[3]); + EXPECT_EQ(1, results[4]); + EXPECT_EQ(2, results[5]); + EXPECT_EQ(8, results[6]); + EXPECT_EQ(4, results[7]); +} + +// Testing with Empty Data +TYPED_TEST(ReplaceTest, EmptyData) { + this->test({}, {10, 11, 12}, {15, 16, 17}); +} +// Test with much larger data sets TEST(LargeScaleReplaceTest, LargeScaleReplaceTest) { { From cc3beca39cc40e2407767facdae572d3b35eba52 Mon Sep 17 00:00:00 2001 From: gcca Date: Wed, 22 Aug 2018 13:43:19 -0500 Subject: [PATCH 14/30] [replace-function] API definition --- CMakeLists.txt | 1 + include/gdf/cffi/functions.h | 14 ++++++- src/replace.cu | 25 ++++++++++++ src/tests/CMakeLists.txt | 1 + src/tests/replace/CMakeLists.txt | 1 + src/tests/replace/replace-test.cu | 65 +++++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 src/replace.cu create mode 100644 src/tests/replace/CMakeLists.txt create mode 100644 src/tests/replace/replace-test.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 970d29b2..14c2f6aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,6 +139,7 @@ cuda_add_library(gdf SHARED src/sqls_ops.cu src/streamcompactionops.cu src/unaryops.cu + src/replace.cu #src/windowedops.cu src/quantiles.cu src/io/csv/csv-reader.cu diff --git a/include/gdf/cffi/functions.h b/include/gdf/cffi/functions.h index 610e756d..2f5e7863 100644 --- a/include/gdf/cffi/functions.h +++ b/include/gdf/cffi/functions.h @@ -659,7 +659,7 @@ gdf_error gpu_hash_columns(gdf_column ** columns_to_hash, int num_columns, gdf_c gdf_error get_column_byte_width(gdf_column * col, int * width); -/* +/* Multi-Column SQL ops: WHERE (Filtering) ORDER-BY @@ -740,3 +740,15 @@ gdf_error gdf_quantile_aprrox( gdf_column* col_in, //input column; double q, //requested quantile in [0,1] void* t_erased_res, //type-erased result of same type as column; gdf_context* ctxt); //context info + +/* replace */ + +/// \brief Replace `to_replace` data of `column` with `values` +/// \param[in/out] column data +/// \param[in] to_replace contains values of column that will be replaced +/// \param[in] values contains the replacement values +/// +/// Note that `to_replace` and `values` are related by the index +gdf_error gdf_replace(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values); diff --git a/src/replace.cu b/src/replace.cu new file mode 100644 index 00000000..18c745c0 --- /dev/null +++ b/src/replace.cu @@ -0,0 +1,25 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +gdf_error +gdf_replace(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values) { + return GDF_CUDA_ERROR; +} diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 4b4a5cf8..b0445d94 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -50,5 +50,6 @@ add_subdirectory(filterops_numeric) add_subdirectory(quantiles) add_subdirectory(validops) add_subdirectory(csv) +add_subdirectory(replace) message(STATUS "******** Tests are ready ********") diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt new file mode 100644 index 00000000..def5158d --- /dev/null +++ b/src/tests/replace/CMakeLists.txt @@ -0,0 +1 @@ +configure_test(replace-test replace-test.cu) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu new file mode 100644 index 00000000..fb496b4f --- /dev/null +++ b/src/tests/replace/replace-test.cu @@ -0,0 +1,65 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include + +static gdf_column +CreateGdfColumn(const std::initializer_list list) { + const std::vector host_data(list); + thrust::device_vector device_data(host_data); + thrust::device_vector device_valid(1, 0); + + return gdf_column{ + .data = thrust::raw_pointer_cast(device_data.data()), + .valid = thrust::raw_pointer_cast(device_valid.data()), + .size = 0, + .dtype = GDF_INT64, + .null_count = 0, + .dtype_info = {}, + }; +} + +TEST(ReplaceTest, API) { + gdf_column column = CreateGdfColumn({1, 2, 3, 4, 5, 6, 7, 8}); + + gdf_column to_replace = CreateGdfColumn({2, 4, 6, 8}); + gdf_column values = CreateGdfColumn({0, 2, 4, 6}); + + const gdf_error status = gdf_replace(&column, &to_replace, &values); + + EXPECT_EQ(GDF_SUCCESS, status); + + const thrust::device_ptr data_ptr( + static_cast(column.data)); + + constexpr std::ptrdiff_t ptrdiff = 8; + + const thrust::device_vector device_data(data_ptr, + data_ptr + ptrdiff); + + EXPECT_EQ(0, device_data[1]); + EXPECT_EQ(2, device_data[3]); + EXPECT_EQ(4, device_data[5]); + EXPECT_EQ(6, device_data[7]); +} From ef4de001570788f150a519e3b81ed4818682aadc Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 12:27:52 -0500 Subject: [PATCH 15/30] [replace-function] Add first implementation --- src/replace.cu | 94 ++++++++++++++++++++++++++++++- src/tests/replace/replace-test.cu | 52 +++++++++-------- 2 files changed, 120 insertions(+), 26 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 18c745c0..162e35f4 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,11 +15,103 @@ * limitations under the License. */ +#include +#include +#include + #include +namespace { + +template +struct gdf_dtype_traits {}; + +#define DTYPE_FACTORY(DTYPE, T) \ + template <> \ + struct gdf_dtype_traits { \ + typedef T value_type; \ + } + +DTYPE_FACTORY(INT8, std::int8_t); +DTYPE_FACTORY(INT16, std::int16_t); +DTYPE_FACTORY(INT32, std::int32_t); +DTYPE_FACTORY(INT64, std::int64_t); +DTYPE_FACTORY(FLOAT32, float); +DTYPE_FACTORY(FLOAT64, double); +DTYPE_FACTORY(DATE32, std::int32_t); +DTYPE_FACTORY(DATE64, std::int64_t); +DTYPE_FACTORY(TIMESTAMP, std::int64_t); + +#undef DTYPE_FACTORY + +template +static inline void +Replace(T *const data, + const std::size_t data_size, + T *const to_replace, + T *const values, + const std::size_t replacement_size) { + thrust::device_ptr begin(data); + thrust::device_ptr end = begin + static_cast(data_size); + + thrust::device_ptr from(to_replace); + thrust::device_ptr to(values); + + for (std::size_t i = 0; i < replacement_size; i++) { + thrust::replace(begin, end, from[i], to[i]); + } +} + +static inline bool +NotEqualReplacementSize(const gdf_column *to_replace, + const gdf_column *values) { + return to_replace->size != values->size; +} + +static inline bool +NotSameDType(const gdf_column *column, + const gdf_column *to_replace, + const gdf_column *values) { + return column->dtype != to_replace->dtype + || to_replace->dtype != values->dtype; +} + +} // namespace + gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - return GDF_CUDA_ERROR; + if (NotEqualReplacementSize(to_replace, values)) { return GDF_CUDA_ERROR; } + + if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } + + switch (column->dtype) { +#define WHEN(DTYPE) \ + case GDF_##DTYPE: { \ + using value_type = gdf_dtype_traits::value_type; \ + Replace(static_cast(column->data), \ + column->size, \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + values->size); \ + } break + + WHEN(INT8); + WHEN(INT16); + WHEN(INT32); + WHEN(INT64); + WHEN(FLOAT32); + WHEN(FLOAT64); + WHEN(DATE32); + WHEN(DATE64); + WHEN(TIMESTAMP); + +#undef WHEN + + case GDF_invalid: + default: return GDF_CUDA_ERROR; + } + + return GDF_SUCCESS; } diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index fb496b4f..803a737e 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -17,23 +17,24 @@ #include -#include - -#include #include #include -static gdf_column -CreateGdfColumn(const std::initializer_list list) { - const std::vector host_data(list); - thrust::device_vector device_data(host_data); - thrust::device_vector device_valid(1, 0); +template +static inline thrust::device_vector +MakeDeviceVector(const std::initializer_list list) { + const std::vector column_data(list); + thrust::device_vector device_data(column_data); + return device_data; +} +static inline gdf_column +MakeGdfColumn(thrust::device_vector &device_vector) { return gdf_column{ - .data = thrust::raw_pointer_cast(device_data.data()), - .valid = thrust::raw_pointer_cast(device_valid.data()), - .size = 0, + .data = thrust::raw_pointer_cast(device_vector.data()), + .valid = nullptr, + .size = device_vector.size(), .dtype = GDF_INT64, .null_count = 0, .dtype_info = {}, @@ -41,25 +42,26 @@ CreateGdfColumn(const std::initializer_list list) { } TEST(ReplaceTest, API) { - gdf_column column = CreateGdfColumn({1, 2, 3, 4, 5, 6, 7, 8}); + thrust::device_vector device_data = + MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); + gdf_column column = MakeGdfColumn(device_data); + + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); - gdf_column to_replace = CreateGdfColumn({2, 4, 6, 8}); - gdf_column values = CreateGdfColumn({0, 2, 4, 6}); + gdf_column to_replace = MakeGdfColumn(to_replace_data); + gdf_column values = MakeGdfColumn(values_data); const gdf_error status = gdf_replace(&column, &to_replace, &values); EXPECT_EQ(GDF_SUCCESS, status); - const thrust::device_ptr data_ptr( + thrust::device_ptr results( static_cast(column.data)); - - constexpr std::ptrdiff_t ptrdiff = 8; - - const thrust::device_vector device_data(data_ptr, - data_ptr + ptrdiff); - - EXPECT_EQ(0, device_data[1]); - EXPECT_EQ(2, device_data[3]); - EXPECT_EQ(4, device_data[5]); - EXPECT_EQ(6, device_data[7]); + EXPECT_EQ(0, results[1]); + EXPECT_EQ(2, results[3]); + EXPECT_EQ(4, results[5]); + EXPECT_EQ(6, results[7]); } From 0b62cd9796eedf070a98d591e00d8f65bc5d39b2 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 18:10:59 -0500 Subject: [PATCH 16/30] [replace-function] Add replacement by lower bound --- src/replace.cu | 54 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 162e35f4..3c8a6299 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,9 +15,10 @@ * limitations under the License. */ +#include #include #include -#include +#include #include @@ -44,22 +45,51 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY +template +class ReplaceFunctor { +public: + ReplaceFunctor(T *const raw) : data(raw) {} + + __host__ __device__ void + operator()(thrust::tuple tuple) { + const T i = thrust::get<0>(tuple); + + if (data[i] == thrust::get<1>(tuple)) { + data[i] = thrust::get<2>(tuple); + } + } + + thrust::device_ptr data; +}; + template static inline void Replace(T *const data, const std::size_t data_size, - T *const to_replace, - T *const values, + const T *const to_replace, + const T *const values, const std::size_t replacement_size) { - thrust::device_ptr begin(data); - thrust::device_ptr end = begin + static_cast(data_size); - - thrust::device_ptr from(to_replace); - thrust::device_ptr to(values); - - for (std::size_t i = 0; i < replacement_size; i++) { - thrust::replace(begin, end, from[i], to[i]); - } + const thrust::device_ptr data_begin(data); + const thrust::device_ptr data_end = + data_begin + static_cast(data_size); + + const thrust::device_ptr from_begin(to_replace); + const thrust::device_ptr from_end = + from_begin + static_cast(replacement_size); + + const thrust::device_ptr to_begin(values); + const thrust::device_ptr to_end = + to_begin + static_cast(replacement_size); + + thrust::device_vector lower_bounds(replacement_size); + thrust::lower_bound( + data_begin, data_end, from_begin, from_end, lower_bounds.begin()); + + thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple( + lower_bounds.cbegin(), from_begin, to_begin)), + thrust::make_zip_iterator(thrust::make_tuple( + lower_bounds.cend(), from_end, to_end)), + ReplaceFunctor(data)); } static inline bool From 5f2c338d7407a6250714b02eb0986382b16c2be9 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 23 Aug 2018 19:10:15 -0500 Subject: [PATCH 17/30] [replace-function] Update typed unit test --- src/tests/replace/replace-test.cu | 48 ++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 803a737e..eb93a6a1 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -21,6 +21,24 @@ #include +template +struct TypeTraits {}; + +#define TYPE_FACTORY(U, D) \ + template <> \ + struct TypeTraits { \ + static constexpr gdf_dtype dtype = GDF_##D; \ + } + +TYPE_FACTORY(std::int8_t, INT8); +TYPE_FACTORY(std::int16_t, INT16); +TYPE_FACTORY(std::int32_t, INT32); +TYPE_FACTORY(std::int64_t, INT64); +TYPE_FACTORY(float, FLOAT32); +TYPE_FACTORY(double, FLOAT64); + +#undef TYPE_FACTORY + template static inline thrust::device_vector MakeDeviceVector(const std::initializer_list list) { @@ -29,27 +47,35 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } +template static inline gdf_column -MakeGdfColumn(thrust::device_vector &device_vector) { +MakeGdfColumn(thrust::device_vector &device_vector) { return gdf_column{ .data = thrust::raw_pointer_cast(device_vector.data()), .valid = nullptr, .size = device_vector.size(), - .dtype = GDF_INT64, + .dtype = TypeTraits::dtype, .null_count = 0, .dtype_info = {}, }; } -TEST(ReplaceTest, API) { - thrust::device_vector device_data = - MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); +template +class ReplaceTest : public testing::Test {}; + +using Types = testing:: + Types; +TYPED_TEST_CASE(ReplaceTest, Types); + +TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { + thrust::device_vector device_data = + MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); gdf_column column = MakeGdfColumn(device_data); - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); gdf_column to_replace = MakeGdfColumn(to_replace_data); gdf_column values = MakeGdfColumn(values_data); @@ -58,8 +84,8 @@ TEST(ReplaceTest, API) { EXPECT_EQ(GDF_SUCCESS, status); - thrust::device_ptr results( - static_cast(column.data)); + thrust::device_ptr results( + static_cast(column.data)); EXPECT_EQ(0, results[1]); EXPECT_EQ(2, results[3]); EXPECT_EQ(4, results[5]); From 35c765d54a7652937c093641e984e82174a634a1 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 08:35:34 -0500 Subject: [PATCH 18/30] [replace-function] Update to replace on unordered column --- src/replace.cu | 65 ++++++++++++------------------- src/tests/replace/replace-test.cu | 26 +++++++++++++ 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 3c8a6299..9583f7d0 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,10 +15,11 @@ * limitations under the License. */ -#include #include #include +#include #include +#include #include @@ -45,51 +46,33 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY -template -class ReplaceFunctor { -public: - ReplaceFunctor(T *const raw) : data(raw) {} - - __host__ __device__ void - operator()(thrust::tuple tuple) { - const T i = thrust::get<0>(tuple); - - if (data[i] == thrust::get<1>(tuple)) { - data[i] = thrust::get<2>(tuple); - } - } - - thrust::device_ptr data; -}; - template static inline void -Replace(T *const data, - const std::size_t data_size, - const T *const to_replace, - const T *const values, - const std::size_t replacement_size) { - const thrust::device_ptr data_begin(data); - const thrust::device_ptr data_end = - data_begin + static_cast(data_size); +Replace(T *const data, + const std::ptrdiff_t data_ptrdiff, + const T *const to_replace, + const T *const values, + const std::ptrdiff_t replacement_ptrdiff) { + const thrust::device_ptr data_begin(data); + const thrust::device_ptr data_end = data_begin + data_ptrdiff; const thrust::device_ptr from_begin(to_replace); const thrust::device_ptr from_end = - from_begin + static_cast(replacement_size); + from_begin + replacement_ptrdiff; const thrust::device_ptr to_begin(values); - const thrust::device_ptr to_end = - to_begin + static_cast(replacement_size); - - thrust::device_vector lower_bounds(replacement_size); - thrust::lower_bound( - data_begin, data_end, from_begin, from_end, lower_bounds.begin()); - - thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple( - lower_bounds.cbegin(), from_begin, to_begin)), - thrust::make_zip_iterator(thrust::make_tuple( - lower_bounds.cend(), from_end, to_end)), - ReplaceFunctor(data)); + const thrust::device_ptr to_end = to_begin + replacement_ptrdiff; + + thrust::for_each( + thrust::device, + thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), + thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), + [=] __device__(thrust::tuple tuple) { + const T from = thrust::get<0>(tuple); + const T to = thrust::get<1>(tuple); + + thrust::replace(thrust::device, data_begin, data_end, from, to); + }); } static inline bool @@ -121,10 +104,10 @@ gdf_replace(gdf_column * column, case GDF_##DTYPE: { \ using value_type = gdf_dtype_traits::value_type; \ Replace(static_cast(column->data), \ - column->size, \ + static_cast(column->size), \ static_cast(to_replace->data), \ static_cast(values->data), \ - values->size); \ + static_cast(values->size)); \ } break WHEN(INT8); diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index eb93a6a1..e9f785ec 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -65,6 +65,7 @@ class ReplaceTest : public testing::Test {}; using Types = testing:: Types; + TYPED_TEST_CASE(ReplaceTest, Types); TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { @@ -91,3 +92,28 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { EXPECT_EQ(4, results[5]); EXPECT_EQ(6, results[7]); } + +TYPED_TEST(ReplaceTest, Unordered) { + thrust::device_vector device_data = + MakeDeviceVector({7, 5, 6, 3, 1, 2, 8, 4}); + gdf_column column = MakeGdfColumn(device_data); + + thrust::device_vector to_replace_data = + MakeDeviceVector({2, 4, 6, 8}); + thrust::device_vector values_data = + MakeDeviceVector({0, 2, 4, 6}); + + gdf_column to_replace = MakeGdfColumn(to_replace_data); + gdf_column values = MakeGdfColumn(values_data); + + const gdf_error status = gdf_replace(&column, &to_replace, &values); + + EXPECT_EQ(GDF_SUCCESS, status); + + thrust::device_ptr results( + static_cast(column.data)); + EXPECT_EQ(4, results[2]); + EXPECT_EQ(0, results[5]); + EXPECT_EQ(6, results[6]); + EXPECT_EQ(2, results[7]); +} From d3e50d31cf50142b727d39a840137bb7685c265b Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 12:12:28 -0500 Subject: [PATCH 19/30] [replace-function] Update class replace functor --- src/replace.cu | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 9583f7d0..86c6f952 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -46,6 +46,25 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY +template +class ReplaceFunctor { +public: + ReplaceFunctor(T *const data, const std::ptrdiff_t data_ptrdiff) + : data_begin(data), data_end(data_begin + data_ptrdiff) {} + + void __device__ + operator()(thrust::tuple tuple) { + const T from = thrust::get<0>(tuple); + const T to = thrust::get<1>(tuple); + + thrust::replace(thrust::device, data_begin, data_end, from, to); + } + +private: + const thrust::device_ptr data_begin; + const thrust::device_ptr data_end; +}; + template static inline void Replace(T *const data, @@ -53,9 +72,6 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - const thrust::device_ptr data_begin(data); - const thrust::device_ptr data_end = data_begin + data_ptrdiff; - const thrust::device_ptr from_begin(to_replace); const thrust::device_ptr from_end = from_begin + replacement_ptrdiff; @@ -67,12 +83,7 @@ Replace(T *const data, thrust::device, thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), - [=] __device__(thrust::tuple tuple) { - const T from = thrust::get<0>(tuple); - const T to = thrust::get<1>(tuple); - - thrust::replace(thrust::device, data_begin, data_end, from, to); - }); + ReplaceFunctor(data, data_ptrdiff)); } static inline bool From 7c12b1a12ad26268d085832e542f21f7f138ae60 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 30 Aug 2018 16:37:27 -0500 Subject: [PATCH 20/30] [replace-function] Common test fixtures --- src/tests/replace/replace-test.cu | 68 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index e9f785ec..7dbcf75e 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -61,32 +61,45 @@ MakeGdfColumn(thrust::device_vector &device_vector) { } template -class ReplaceTest : public testing::Test {}; +class ReplaceTest : public testing::Test { +protected: + thrust::device_ptr + test(const std::initializer_list &data_list, + const std::initializer_list &to_replace_list, + const std::initializer_list &values_list) { + device_data = MakeDeviceVector(data_list); + to_replace_data = MakeDeviceVector(to_replace_list); + values_data = MakeDeviceVector(values_list); -using Types = testing:: - Types; + column = MakeGdfColumn(device_data); + to_replace = MakeGdfColumn(to_replace_data); + values = MakeGdfColumn(values_data); -TYPED_TEST_CASE(ReplaceTest, Types); + const gdf_error status = gdf_replace(&column, &to_replace, &values); -TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { - thrust::device_vector device_data = - MakeDeviceVector({1, 2, 3, 4, 5, 6, 7, 8}); - gdf_column column = MakeGdfColumn(device_data); + EXPECT_EQ(GDF_SUCCESS, status); - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); + return thrust::device_ptr(static_cast(column.data)); + } - gdf_column to_replace = MakeGdfColumn(to_replace_data); - gdf_column values = MakeGdfColumn(values_data); + thrust::device_vector device_data; + thrust::device_vector to_replace_data; + thrust::device_vector values_data; - const gdf_error status = gdf_replace(&column, &to_replace, &values); + gdf_column column; + gdf_column to_replace; + gdf_column values; +}; - EXPECT_EQ(GDF_SUCCESS, status); +using Types = testing:: + Types; + +TYPED_TEST_CASE(ReplaceTest, Types); + +TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { + thrust::device_ptr results = + this->test({1, 2, 3, 4, 5, 6, 7, 8}, {2, 4, 6, 8}, {0, 2, 4, 6}); - thrust::device_ptr results( - static_cast(column.data)); EXPECT_EQ(0, results[1]); EXPECT_EQ(2, results[3]); EXPECT_EQ(4, results[5]); @@ -94,24 +107,9 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { } TYPED_TEST(ReplaceTest, Unordered) { - thrust::device_vector device_data = - MakeDeviceVector({7, 5, 6, 3, 1, 2, 8, 4}); - gdf_column column = MakeGdfColumn(device_data); - - thrust::device_vector to_replace_data = - MakeDeviceVector({2, 4, 6, 8}); - thrust::device_vector values_data = - MakeDeviceVector({0, 2, 4, 6}); - - gdf_column to_replace = MakeGdfColumn(to_replace_data); - gdf_column values = MakeGdfColumn(values_data); - - const gdf_error status = gdf_replace(&column, &to_replace, &values); - - EXPECT_EQ(GDF_SUCCESS, status); + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 4, 6, 8}, {0, 2, 4, 6}); - thrust::device_ptr results( - static_cast(column.data)); EXPECT_EQ(4, results[2]); EXPECT_EQ(0, results[5]); EXPECT_EQ(6, results[6]); From c01252bce5f35d512333656ee3672cccf8706cf1 Mon Sep 17 00:00:00 2001 From: William Malpica Date: Fri, 31 Aug 2018 15:20:29 -0500 Subject: [PATCH 21/30] created larger scale test for replace funtion --- src/replace.cu | 2 +- src/tests/replace/replace-test.cu | 59 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/replace.cu b/src/replace.cu index 86c6f952..e59d5e7b 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -106,7 +106,7 @@ gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - if (NotEqualReplacementSize(to_replace, values)) { return GDF_CUDA_ERROR; } + if (NotEqualReplacementSize(to_replace, values)) { return GDF_COLUMN_SIZE_MISMATCH; } if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 7dbcf75e..3af1232d 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -21,6 +21,8 @@ #include +#include + template struct TypeTraits {}; @@ -115,3 +117,60 @@ TYPED_TEST(ReplaceTest, Unordered) { EXPECT_EQ(6, results[6]); EXPECT_EQ(2, results[7]); } + + + + +TEST(LargeScaleReplaceTest, LargeScaleReplaceTest) { + + { + const int DATA_SIZE = 1000000; + const int REPLACE_SIZE = 10000; + + srand((unsigned)time(NULL)); + + std::vector column_data(DATA_SIZE); + for (int i = 0; i < DATA_SIZE; i++){ + column_data[i] = rand() % (2*REPLACE_SIZE); + } + + std::vector from(DATA_SIZE); + std::vector to(DATA_SIZE); + int count = 0; + for (int i = 0; i < 7; i++){ + for (int j = 0; j < REPLACE_SIZE; j += 7){ + from[i+j] = count; + count++; + to[i+j] = count; + } + } + + thrust::device_vector device_data(column_data); + gdf_column data_gdf = MakeGdfColumn(device_data); + thrust::device_vector device_from(from); + gdf_column from_gdf = MakeGdfColumn(device_from); + thrust::device_vector device_to(to); + gdf_column to_gdf = MakeGdfColumn(device_to); + + const gdf_error status = gdf_replace(&data_gdf, &from_gdf, &to_gdf); + + EXPECT_EQ(GDF_SUCCESS, status); + + std::vector replaced_data(DATA_SIZE); + thrust::copy(device_data.begin(), device_data.end(), replaced_data.begin()); + + count = 0; + for (int i = 0; i < DATA_SIZE; i++){ + if (column_data[i] < REPLACE_SIZE){ + EXPECT_EQ(column_data[i] + 1, replaced_data[i]); + if (column_data[i] + 1 != replaced_data[i]){ + std::cout<<"failed at "< 20){ + break; + } + } + } + } + } +} From 321656e0baf6036d8d11831367aebbdd7218f156 Mon Sep 17 00:00:00 2001 From: gcca Date: Mon, 3 Sep 2018 16:47:05 -0500 Subject: [PATCH 22/30] [replace-function] Replace kernel --- src/replace.cu | 93 +++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index e59d5e7b..1c4fd333 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -15,6 +15,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -28,10 +30,10 @@ namespace { template struct gdf_dtype_traits {}; -#define DTYPE_FACTORY(DTYPE, T) \ - template <> \ - struct gdf_dtype_traits { \ - typedef T value_type; \ +#define DTYPE_FACTORY(DTYPE, T) \ + template <> \ + struct gdf_dtype_traits { \ + typedef T value_type; \ } DTYPE_FACTORY(INT8, std::int8_t); @@ -47,23 +49,27 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY template -class ReplaceFunctor { -public: - ReplaceFunctor(T *const data, const std::ptrdiff_t data_ptrdiff) - : data_begin(data), data_end(data_begin + data_ptrdiff) {} - - void __device__ - operator()(thrust::tuple tuple) { - const T from = thrust::get<0>(tuple); - const T to = thrust::get<1>(tuple); - - thrust::replace(thrust::device, data_begin, data_end, from, to); +__global__ void +replace_kernel(T *const data, + const std::ptrdiff_t data_ptrdiff, + const T *const to_replace, + const T *const values, + const std::ptrdiff_t replacement_ptrdiff) { + for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; + i < data_ptrdiff; + i += blockDim.x * gridDim.x) { + const thrust::device_ptr begin(to_replace); + const thrust::device_ptr end(begin + replacement_ptrdiff); + + const thrust::device_ptr found = // TODO: find by map kernel + thrust::find(thrust::device, begin, end, data[i]); + + if (found != end) { + std::size_t j = thrust::distance(begin, found); + data[i] = values[j]; + } } - -private: - const thrust::device_ptr data_begin; - const thrust::device_ptr data_end; -}; +} template static inline void @@ -72,18 +78,19 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - const thrust::device_ptr from_begin(to_replace); - const thrust::device_ptr from_end = - from_begin + replacement_ptrdiff; - - const thrust::device_ptr to_begin(values); - const thrust::device_ptr to_end = to_begin + replacement_ptrdiff; - - thrust::for_each( - thrust::device, - thrust::make_zip_iterator(thrust::make_tuple(from_begin, to_begin)), - thrust::make_zip_iterator(thrust::make_tuple(from_end, to_end)), - ReplaceFunctor(data, data_ptrdiff)); + int multiprocessors; + // TODO: device selection + cudaDeviceGetAttribute(&multiprocessors, cudaDevAttrMultiProcessorCount, 0); + + std::size_t blocks = std::ceil(data_ptrdiff / (multiprocessors * 256.)); + + replace_kernel + <<>>( // TODO: calc blocks and threads + data, + data_ptrdiff, + to_replace, + values, + replacement_ptrdiff); } static inline bool @@ -106,19 +113,21 @@ gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, const gdf_column *values) { - if (NotEqualReplacementSize(to_replace, values)) { return GDF_COLUMN_SIZE_MISMATCH; } + if (NotEqualReplacementSize(to_replace, values)) { + return GDF_COLUMN_SIZE_MISMATCH; + } if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } switch (column->dtype) { -#define WHEN(DTYPE) \ - case GDF_##DTYPE: { \ - using value_type = gdf_dtype_traits::value_type; \ - Replace(static_cast(column->data), \ - static_cast(column->size), \ - static_cast(to_replace->data), \ - static_cast(values->data), \ - static_cast(values->size)); \ +#define WHEN(DTYPE) \ + case GDF_##DTYPE: { \ + using value_type = gdf_dtype_traits::value_type; \ + Replace(static_cast(column->data), \ + static_cast(column->size), \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + static_cast(values->size)); \ } break WHEN(INT8); @@ -134,7 +143,7 @@ gdf_replace(gdf_column * column, #undef WHEN case GDF_invalid: - default: return GDF_CUDA_ERROR; + default: return GDF_UNSUPPORTED_DTYPE; } return GDF_SUCCESS; From 7f80017fdf22188f51da851cd7a2f367837f7956 Mon Sep 17 00:00:00 2001 From: gcca Date: Tue, 4 Sep 2018 08:41:16 -0500 Subject: [PATCH 23/30] [replace-function] Check device attribute status --- src/replace.cu | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 1c4fd333..fc031397 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -51,12 +51,11 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); template __global__ void replace_kernel(T *const data, - const std::ptrdiff_t data_ptrdiff, + const std::size_t data_size, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; - i < data_ptrdiff; + for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < data_size; i += blockDim.x * gridDim.x) { const thrust::device_ptr begin(to_replace); const thrust::device_ptr end(begin + replacement_ptrdiff); @@ -72,25 +71,30 @@ replace_kernel(T *const data, } template -static inline void +static inline gdf_error Replace(T *const data, - const std::ptrdiff_t data_ptrdiff, + const std::size_t data_size, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { int multiprocessors; // TODO: device selection - cudaDeviceGetAttribute(&multiprocessors, cudaDevAttrMultiProcessorCount, 0); + const cudaError_t status = cudaDeviceGetAttribute( + &multiprocessors, cudaDevAttrMultiProcessorCount, 0); - std::size_t blocks = std::ceil(data_ptrdiff / (multiprocessors * 256.)); + if (status != cudaSuccess) { return GDF_CUDA_ERROR; } + + const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); replace_kernel <<>>( // TODO: calc blocks and threads data, - data_ptrdiff, + data_size, to_replace, values, replacement_ptrdiff); + + return GDF_SUCCESS; } static inline bool @@ -123,12 +127,12 @@ gdf_replace(gdf_column * column, #define WHEN(DTYPE) \ case GDF_##DTYPE: { \ using value_type = gdf_dtype_traits::value_type; \ - Replace(static_cast(column->data), \ - static_cast(column->size), \ - static_cast(to_replace->data), \ - static_cast(values->data), \ - static_cast(values->size)); \ - } break + return Replace(static_cast(column->data), \ + static_cast(column->size), \ + static_cast(to_replace->data), \ + static_cast(values->data), \ + static_cast(values->size)); \ + } WHEN(INT8); WHEN(INT16); @@ -145,6 +149,4 @@ gdf_replace(gdf_column * column, case GDF_invalid: default: return GDF_UNSUPPORTED_DTYPE; } - - return GDF_SUCCESS; } From 207fe0e781fe0945dc8433bd6a1459801fd9cc6e Mon Sep 17 00:00:00 2001 From: gcca Date: Tue, 4 Sep 2018 09:20:29 -0500 Subject: [PATCH 24/30] [replace-function] Move ptr construction (invariant) --- src/replace.cu | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index fc031397..7602412b 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -50,22 +50,23 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); template __global__ void -replace_kernel(T *const data, - const std::size_t data_size, - const T *const to_replace, - const T *const values, - const std::ptrdiff_t replacement_ptrdiff) { +replace_kernel(T *const data, + const std::size_t data_size, + const T *const values, + const thrust::device_ptr to_replace_begin, + const thrust::device_ptr to_replace_end) { for (std::size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < data_size; i += blockDim.x * gridDim.x) { - const thrust::device_ptr begin(to_replace); - const thrust::device_ptr end(begin + replacement_ptrdiff); + // TODO: find by map kernel + const thrust::device_ptr found_ptr = thrust::find( + thrust::device, to_replace_begin, to_replace_end, data[i]); - const thrust::device_ptr found = // TODO: find by map kernel - thrust::find(thrust::device, begin, end, data[i]); + if (found_ptr != to_replace_end) { + typename thrust::iterator_traits< + const thrust::device_ptr>::difference_type + value_found_index = thrust::distance(to_replace_begin, found_ptr); - if (found != end) { - std::size_t j = thrust::distance(begin, found); - data[i] = values[j]; + data[i] = values[value_found_index]; } } } @@ -86,13 +87,17 @@ Replace(T *const data, const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); + const thrust::device_ptr to_replace_begin(to_replace); + const thrust::device_ptr to_replace_end(to_replace_begin + + replacement_ptrdiff); + replace_kernel <<>>( // TODO: calc blocks and threads data, data_size, - to_replace, values, - replacement_ptrdiff); + to_replace_begin, + to_replace_end); return GDF_SUCCESS; } From 87673140520421283f35fdb30954aa22295edab9 Mon Sep 17 00:00:00 2001 From: gcca Date: Wed, 5 Sep 2018 10:06:35 -0500 Subject: [PATCH 25/30] [replace-function] Add replace benchmark against cpu --- src/replace.cu | 11 +-- src/tests/replace/CMakeLists.txt | 56 +++++++++++++ src/tests/replace/replace-benchmark.cu | 110 +++++++++++++++++++++++++ src/tests/replace/replace-test.cu | 35 +------- src/tests/replace/utils.h | 64 ++++++++++++++ 5 files changed, 233 insertions(+), 43 deletions(-) create mode 100644 src/tests/replace/replace-benchmark.cu create mode 100644 src/tests/replace/utils.h diff --git a/src/replace.cu b/src/replace.cu index 7602412b..3edf5124 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -78,21 +78,14 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - int multiprocessors; - // TODO: device selection - const cudaError_t status = cudaDeviceGetAttribute( - &multiprocessors, cudaDevAttrMultiProcessorCount, 0); - - if (status != cudaSuccess) { return GDF_CUDA_ERROR; } - - const std::size_t blocks = std::ceil(data_size / (multiprocessors * 256.)); + const std::size_t blocks = std::ceil(data_size / 256.); const thrust::device_ptr to_replace_begin(to_replace); const thrust::device_ptr to_replace_end(to_replace_begin + replacement_ptrdiff); replace_kernel - <<>>( // TODO: calc blocks and threads + <<>>( // TODO: calc blocks and threads data, data_size, values, diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt index def5158d..3c9c1410 100644 --- a/src/tests/replace/CMakeLists.txt +++ b/src/tests/replace/CMakeLists.txt @@ -1 +1,57 @@ +#============================================================================= +# Copyright 2018 BlazingDB, Inc. +# Copyright 2018 Cristhian Alberto Gonzales Castillo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + configure_test(replace-test replace-test.cu) + +if (GDF_BENCHMARK) +include(ExternalProject) + +ExternalProject_Add(benchmark_ep + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=RELEASE + -DCMAKE_INSTALL_PREFIX=build + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.4.1 + UPDATE_COMMAND "" +) +ExternalProject_Get_property(benchmark_ep BINARY_DIR) +set(BENCHMARK_ROOT ${BINARY_DIR}/build) + +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) + +add_library(Google::Benchmark INTERFACE IMPORTED) +add_dependencies(Google::Benchmark benchmark_ep) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) + +add_library(Google::Benchmark::Main INTERFACE IMPORTED) +set_target_properties(Google::Benchmark::Main +PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) + +function(GDF_ADD_BENCHMARK TARGET) + list(REMOVE_AT ARGV 0) + cuda_add_executable(${TARGET} ${ARGV}) + target_link_libraries(${TARGET} + Google::Benchmark Google::Benchmark::Main gdf) +endfunction() + +GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) +endif() diff --git a/src/tests/replace/replace-benchmark.cu b/src/tests/replace/replace-benchmark.cu new file mode 100644 index 00000000..3097f7e8 --- /dev/null +++ b/src/tests/replace/replace-benchmark.cu @@ -0,0 +1,110 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include + +#include "utils.h" + +using T = std::int64_t; + +static void +BM_CPU_LoopReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + std::vector vector(length); + thrust::sequence(vector.begin(), vector.end(), 1); + + std::vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + + std::vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + + for (auto _ : state) { + for (std::size_t i = 0; i < vector.size(); i++) { + auto current = std::find( + to_replace_vector.begin(), to_replace_vector.end(), vector[i]); + if (current != to_replace_vector.end()) { + std::size_t j = + std::distance(to_replace_vector.begin(), current); + vector[i] = values_vector[j]; + } + } + } +} + +static void +BM_CPU_MapReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + std::vector vector(length); + thrust::sequence(vector.begin(), vector.end(), 1); + + std::vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + + std::vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + + for (auto _ : state) { + std::unordered_map map; + for (std::size_t i = 0; i < values_vector.size(); i++) { + map.insert({to_replace_vector[i], values_vector[i]}); + } + + for (std::size_t i = 0; i < vector.size(); i++) { + try { + vector[i] = map[vector[i]]; + } catch (...) { continue; } + } + } +} + +static void +BM_GPU_LoopReplace(benchmark::State &state) { + const std::size_t length = state.range(0); + + thrust::device_vector device_vector(length); + thrust::sequence(device_vector.begin(), device_vector.end(), 1); + gdf_column column = MakeGdfColumn(device_vector); + + thrust::device_vector to_replace_vector(10); + thrust::sequence(to_replace_vector.begin(), to_replace_vector.end(), 1); + gdf_column to_replace = MakeGdfColumn(to_replace_vector); + + thrust::device_vector values_vector(10); + thrust::sequence(values_vector.begin(), values_vector.end(), 1); + gdf_column values = MakeGdfColumn(values_vector); + + for (auto _ : state) { + const gdf_error status = gdf_replace(&column, &to_replace, &values); + state.PauseTiming(); + if (status != GDF_SUCCESS) { state.SkipWithError("Failed replace"); } + state.ResumeTiming(); + } +} + +BENCHMARK(BM_CPU_LoopReplace)->Ranges({{8, 8 << 16}, {8, 512}}); +BENCHMARK(BM_CPU_MapReplace)->Ranges({{8, 8 << 16}, {8, 512}}); +BENCHMARK(BM_GPU_LoopReplace)->Ranges({{8, 8 << 16}, {8, 512}}); diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 3af1232d..ad98e0cf 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -17,29 +17,9 @@ #include -#include - #include -#include - -template -struct TypeTraits {}; - -#define TYPE_FACTORY(U, D) \ - template <> \ - struct TypeTraits { \ - static constexpr gdf_dtype dtype = GDF_##D; \ - } - -TYPE_FACTORY(std::int8_t, INT8); -TYPE_FACTORY(std::int16_t, INT16); -TYPE_FACTORY(std::int32_t, INT32); -TYPE_FACTORY(std::int64_t, INT64); -TYPE_FACTORY(float, FLOAT32); -TYPE_FACTORY(double, FLOAT64); - -#undef TYPE_FACTORY +#include "utils.h" template static inline thrust::device_vector @@ -49,19 +29,6 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } -template -static inline gdf_column -MakeGdfColumn(thrust::device_vector &device_vector) { - return gdf_column{ - .data = thrust::raw_pointer_cast(device_vector.data()), - .valid = nullptr, - .size = device_vector.size(), - .dtype = TypeTraits::dtype, - .null_count = 0, - .dtype_info = {}, - }; -} - template class ReplaceTest : public testing::Test { protected: diff --git a/src/tests/replace/utils.h b/src/tests/replace/utils.h new file mode 100644 index 00000000..2d5332f1 --- /dev/null +++ b/src/tests/replace/utils.h @@ -0,0 +1,64 @@ +/* + * Copyright 2018 BlazingDB, Inc. + * Copyright 2018 Cristhian Alberto Gonzales Castillo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +template +struct TypeTraits {}; + +#define TYPE_FACTORY(U, D) \ + template <> \ + struct TypeTraits { \ + static constexpr gdf_dtype dtype = GDF_##D; \ + } + +TYPE_FACTORY(std::int8_t, INT8); +TYPE_FACTORY(std::int16_t, INT16); +TYPE_FACTORY(std::int32_t, INT32); +TYPE_FACTORY(std::int64_t, INT64); +TYPE_FACTORY(float, FLOAT32); +TYPE_FACTORY(double, FLOAT64); + +#undef TYPE_FACTORY + +template +static inline gdf_column +MakeGdfColumn(thrust::device_vector &device_vector) { + return gdf_column{ + .data = thrust::raw_pointer_cast(device_vector.data()), + .valid = nullptr, + .size = device_vector.size(), + .dtype = TypeTraits::dtype, + .null_count = 0, + .dtype_info = {}, + }; +} + +template +static inline gdf_column +MakeGdfColumn(std::vector &vector) { + return gdf_column{ + .data = vector.data(), + .valid = nullptr, + .size = vector.size(), + .dtype = TypeTraits::dtype, + .null_count = 0, + .dtype_info = {}, + }; +} From 622abbd9bd4515ee54c503d03756e093ca82baab Mon Sep 17 00:00:00 2001 From: William Malpica Date: Fri, 28 Sep 2018 08:53:00 -0500 Subject: [PATCH 26/30] moved replace benchmark to bench folder. Added comments and more tests to replace-test --- CMakeLists.txt | 5 +++ src/bench/CMakeLists.txt | 43 +++++++++++++++++++ src/bench/replace/CMakeLists.txt | 22 ++++++++++ .../replace/replace-benchmark.cu | 2 +- src/tests/replace/CMakeLists.txt | 38 ---------------- src/tests/replace/replace-test.cu | 43 ++++++++++++++++++- 6 files changed, 113 insertions(+), 40 deletions(-) create mode 100644 src/bench/CMakeLists.txt create mode 100644 src/bench/replace/CMakeLists.txt rename src/{tests => bench}/replace/replace-benchmark.cu (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 14c2f6aa..b3c9437c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -208,5 +208,10 @@ if(GTEST_FOUND) else() message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") endif() + +if(GDF_BENCHMARK) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/bench) + endif() + # Print the project summary feature_summary(WHAT ALL INCLUDE_QUIET_PACKAGES FATAL_ON_MISSING_REQUIRED_PACKAGES) diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt new file mode 100644 index 00000000..9facb01f --- /dev/null +++ b/src/bench/CMakeLists.txt @@ -0,0 +1,43 @@ + +if(GDF_BENCHMARK) + + include(ExternalProject) + + ExternalProject_Add(benchmark_ep + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=RELEASE + -DCMAKE_INSTALL_PREFIX=build + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.4.1 + UPDATE_COMMAND "" + ) + ExternalProject_Get_property(benchmark_ep BINARY_DIR) + set(BENCHMARK_ROOT ${BINARY_DIR}/build) + + file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) + file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) + + add_library(Google::Benchmark INTERFACE IMPORTED) + add_dependencies(Google::Benchmark benchmark_ep) + set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) + set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) + + add_library(Google::Benchmark::Main INTERFACE IMPORTED) + set_target_properties(Google::Benchmark::Main + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) + + +function(GDF_ADD_BENCHMARK TARGET) + list(REMOVE_AT ARGV 0) + cuda_add_executable(${TARGET} ${ARGV}) + target_link_libraries(${TARGET} + Google::Benchmark Google::Benchmark::Main gdf) +endfunction() + +endif() + + + add_subdirectory(replace) \ No newline at end of file diff --git a/src/bench/replace/CMakeLists.txt b/src/bench/replace/CMakeLists.txt new file mode 100644 index 00000000..76bc607d --- /dev/null +++ b/src/bench/replace/CMakeLists.txt @@ -0,0 +1,22 @@ +#============================================================================= +# Copyright 2018 BlazingDB, Inc. +# Copyright 2018 Cristhian Alberto Gonzales Castillo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + + +if (GDF_BENCHMARK) + +GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) +endif() diff --git a/src/tests/replace/replace-benchmark.cu b/src/bench/replace/replace-benchmark.cu similarity index 98% rename from src/tests/replace/replace-benchmark.cu rename to src/bench/replace/replace-benchmark.cu index 3097f7e8..03a8e805 100644 --- a/src/tests/replace/replace-benchmark.cu +++ b/src/bench/replace/replace-benchmark.cu @@ -24,7 +24,7 @@ #include -#include "utils.h" +#include "../../tests/replace/utils.h" using T = std::int64_t; diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt index 3c9c1410..7da0c2f9 100644 --- a/src/tests/replace/CMakeLists.txt +++ b/src/tests/replace/CMakeLists.txt @@ -17,41 +17,3 @@ configure_test(replace-test replace-test.cu) -if (GDF_BENCHMARK) -include(ExternalProject) - -ExternalProject_Add(benchmark_ep - CMAKE_ARGS - -DCMAKE_BUILD_TYPE=RELEASE - -DCMAKE_INSTALL_PREFIX=build - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.4.1 - UPDATE_COMMAND "" -) -ExternalProject_Get_property(benchmark_ep BINARY_DIR) -set(BENCHMARK_ROOT ${BINARY_DIR}/build) - -file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) -file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) - -add_library(Google::Benchmark INTERFACE IMPORTED) -add_dependencies(Google::Benchmark benchmark_ep) -set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) -set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) - -add_library(Google::Benchmark::Main INTERFACE IMPORTED) -set_target_properties(Google::Benchmark::Main -PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) - -function(GDF_ADD_BENCHMARK TARGET) - list(REMOVE_AT ARGV 0) - cuda_add_executable(${TARGET} ${ARGV}) - target_link_libraries(${TARGET} - Google::Benchmark Google::Benchmark::Main gdf) -endfunction() - -GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) -endif() diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index ad98e0cf..253f1cbf 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -29,6 +29,9 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } + + +// This is the main teast feature template class ReplaceTest : public testing::Test { protected: @@ -65,6 +68,7 @@ using Types = testing:: TYPED_TEST_CASE(ReplaceTest, Types); +// Simple test, replacing all even values TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { thrust::device_ptr results = this->test({1, 2, 3, 4, 5, 6, 7, 8}, {2, 4, 6, 8}, {0, 2, 4, 6}); @@ -75,9 +79,11 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { EXPECT_EQ(6, results[7]); } + +// Similar test as ReplaceEvenPosition, but with unordered data TYPED_TEST(ReplaceTest, Unordered) { thrust::device_ptr results = - this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 4, 6, 8}, {0, 2, 4, 6}); + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {2, 6, 4, 8}, {0, 4, 2, 6}); EXPECT_EQ(4, results[2]); EXPECT_EQ(0, results[5]); @@ -86,8 +92,43 @@ TYPED_TEST(ReplaceTest, Unordered) { } +// Testing with Empty Replace +TYPED_TEST(ReplaceTest, EmptyReplace) { + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {}, {}); + + EXPECT_EQ(7, results[0]); + EXPECT_EQ(5, results[1]); + EXPECT_EQ(6, results[2]); + EXPECT_EQ(3, results[3]); + EXPECT_EQ(1, results[4]); + EXPECT_EQ(2, results[5]); + EXPECT_EQ(8, results[6]); + EXPECT_EQ(4, results[7]); +} + +// Testing with Nothing To Replace +TYPED_TEST(ReplaceTest, NothingToReplace) { + thrust::device_ptr results = + this->test({7, 5, 6, 3, 1, 2, 8, 4}, {10, 11, 12}, {15, 16, 17}); + + EXPECT_EQ(7, results[0]); + EXPECT_EQ(5, results[1]); + EXPECT_EQ(6, results[2]); + EXPECT_EQ(3, results[3]); + EXPECT_EQ(1, results[4]); + EXPECT_EQ(2, results[5]); + EXPECT_EQ(8, results[6]); + EXPECT_EQ(4, results[7]); +} + +// Testing with Empty Data +TYPED_TEST(ReplaceTest, EmptyData) { + this->test({}, {10, 11, 12}, {15, 16, 17}); +} +// Test with much larger data sets TEST(LargeScaleReplaceTest, LargeScaleReplaceTest) { { From d364fd899830d8aef59429c6021edd091cc1daf5 Mon Sep 17 00:00:00 2001 From: gcca Date: Wed, 17 Oct 2018 16:35:15 -0500 Subject: [PATCH 27/30] [replace-function] Add documentation --- src/replace.cu | 40 ++++++++++++++++++++++++------- src/tests/replace/replace-test.cu | 2 +- src/tests/replace/utils.h | 3 +++ 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/replace.cu b/src/replace.cu index 3edf5124..9b5f994f 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -27,6 +27,7 @@ namespace { +//! traits to get primitive type from gdf dtype template struct gdf_dtype_traits {}; @@ -48,6 +49,11 @@ DTYPE_FACTORY(TIMESTAMP, std::int64_t); #undef DTYPE_FACTORY +/// /brief Replace kernel +/// \param[in/out] data with elements to be replaced +/// \param[in] values contains the replacement values +/// \param[in] to_replace_begin begin pointer of `to_replace` array +/// \param[in] to_replace_begin end pointer of `to_replace` array template __global__ void replace_kernel(T *const data, @@ -71,6 +77,12 @@ replace_kernel(T *const data, } } +/// /brief Call replace kernel according to primitive type T +/// \param[in/out] data with elements to be replaced +/// \param[in] data_size number of elements in data +/// \param[in] to_replace contains values that will be replaced +/// \param[in] values contains the replacement values +/// \param[in] replacement_ptrdiff to get the end pointer of `to_replace` array template static inline gdf_error Replace(T *const data, @@ -78,29 +90,35 @@ Replace(T *const data, const T *const to_replace, const T *const values, const std::ptrdiff_t replacement_ptrdiff) { - const std::size_t blocks = std::ceil(data_size / 256.); + const std::size_t blocks = std::ceil(data_size / 256.); const thrust::device_ptr to_replace_begin(to_replace); const thrust::device_ptr to_replace_end(to_replace_begin + replacement_ptrdiff); - replace_kernel - <<>>( // TODO: calc blocks and threads - data, - data_size, - values, - to_replace_begin, - to_replace_end); + replace_kernel<<>>( // TODO: calc blocks and threads + data, + data_size, + values, + to_replace_begin, + to_replace_end); return GDF_SUCCESS; } +/// \brief Check if two gdf_columns have the same size +/// \param[in] to_replace is a gdf_column +/// \param[in] values is a gdf_column static inline bool NotEqualReplacementSize(const gdf_column *to_replace, const gdf_column *values) { return to_replace->size != values->size; } +/// \brief Check if the three gdf columns have the same dtype +/// \param[in] column is as gdf_column +/// \param[in] to_replace is a gdf_column +/// \param[in] values is a gdf_column static inline bool NotSameDType(const gdf_column *column, const gdf_column *to_replace, @@ -111,6 +129,12 @@ NotSameDType(const gdf_column *column, } // namespace +/// \brief Replace `to_replace` data of `column` with `values` +/// \param[in/out] column data +/// \param[in] to_replace contains values of column that will be replaced +/// \param[in] values contains the replacement values +/// +/// Note that `to_replace` and `values` are related by the index gdf_error gdf_replace(gdf_column * column, const gdf_column *to_replace, diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index 253f1cbf..f17a5932 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -31,7 +31,7 @@ MakeDeviceVector(const std::initializer_list list) { -// This is the main teast feature +// This is the main test feature template class ReplaceTest : public testing::Test { protected: diff --git a/src/tests/replace/utils.h b/src/tests/replace/utils.h index 2d5332f1..54fa26ca 100644 --- a/src/tests/replace/utils.h +++ b/src/tests/replace/utils.h @@ -19,6 +19,7 @@ #include +//! traits to get gdf dtype from primitive type template struct TypeTraits {}; @@ -37,6 +38,7 @@ TYPE_FACTORY(double, FLOAT64); #undef TYPE_FACTORY +//! Convert thrust device vector to gdf_column template static inline gdf_column MakeGdfColumn(thrust::device_vector &device_vector) { @@ -50,6 +52,7 @@ MakeGdfColumn(thrust::device_vector &device_vector) { }; } +//! Convert STL vector to gdf_column template static inline gdf_column MakeGdfColumn(std::vector &vector) { From c5f27c14070036e18a1c7399bdcc59be8c560448 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 25 Oct 2018 07:01:40 -0500 Subject: [PATCH 28/30] [replace-function] Update function name --- include/gdf/cffi/functions.h | 97 ++++++++++++++++++------------------ src/replace.cu | 33 ++++++------ 2 files changed, 66 insertions(+), 64 deletions(-) diff --git a/include/gdf/cffi/functions.h b/include/gdf/cffi/functions.h index de049131..7e4d5454 100644 --- a/include/gdf/cffi/functions.h +++ b/include/gdf/cffi/functions.h @@ -1,18 +1,18 @@ #pragma once /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Start a NVTX range with predefined color. * * This function is useful only for profiling with nvvp or Nsight Systems. It * demarcates the begining of a user-defined range with a specified name and * color that will show up in the timeline view of nvvp/Nsight Systems. Can be * nested within other ranges. - * + * * @Param name The name of the NVTX range * @Param color The predefined gdf_color enum to use to color this range - * - * @Returns + * + * @Returns */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_push(char const * const name, gdf_color color ); @@ -21,47 +21,47 @@ gdf_error gdf_nvtx_range_push(char const * const name, gdf_color color ); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Start a NVTX range with a custom ARGB color code. * * This function is useful only for profiling with nvvp or Nsight Systems. It * demarcates the begining of a user-defined range with a specified name and * color that will show up in the timeline view of nvvp/Nsight Systems. Can be * nested within other ranges. - * + * * @Param name The name of the NVTX range * @Param color The ARGB hex color code to use to color this range (e.g., 0xFF00FF00) - * - * @Returns + * + * @Returns */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color ); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Ends the inner-most NVTX range. * * This function is useful only for profiling with nvvp or Nsight Systems. It * will demarcate the end of the inner-most range, i.e., the most recent call to * gdf_nvtx_range_push. - * - * @Returns + * + * @Returns */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_pop(); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Counts the number of valid bits in the mask that corresponds to * the specified number of rows. - * + * * @Param[in] masks Array of gdf_valid_types with enough bits to represent * num_rows number of rows * @Param[in] num_rows The number of rows represented in the bit-validity mask. * @Param[out] count The number of valid rows in the mask - * - * @Returns GDF_SUCCESS upon successful completion. + * + * @Returns GDF_SUCCESS upon successful completion. */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_count_nonzero_mask(gdf_valid_type const * masks, int num_rows, int * count); @@ -79,15 +79,15 @@ gdf_error gdf_column_view_augmented(gdf_column *column, void *data, gdf_valid_ty gdf_error gdf_column_free(gdf_column *column); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Concatenates the gdf_columns into a single, contiguous column, * including the validity bitmasks - * - * @Param[out] output A column whose buffers are already allocated that will + * + * @Param[out] output A column whose buffers are already allocated that will * @Param[in] columns_to_conat[] The columns to concatenate * @Param[in] num_columns The number of columns to concatenate * contain the concatenation of the input columns - * + * * @Returns GDF_SUCCESS upon successful completion */ /* ----------------------------------------------------------------------------*/ @@ -198,10 +198,10 @@ gdf_error gdf_segmented_radixsort_generic(gdf_segmented_radixsort_plan_type *hdl /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Performs an inner join on the specified columns of two * dataframes (left, right) - * + * * @Param[in] left_cols[] The columns of the left dataframe * @Param[in] num_left_cols The number of columns in the left dataframe * @Param[in] left_join_cols[] The column indices of columns from the left dataframe @@ -218,13 +218,13 @@ gdf_error gdf_segmented_radixsort_generic(gdf_segmented_radixsort_plan_type *hdl * @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table * @Param[in] join_context The context to use to control how the join is performed,e.g., * sort vs hash based implementation - * + * * @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate * error code */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_inner_join( - gdf_column **left_cols, + gdf_column **left_cols, int num_left_cols, int left_join_cols[], gdf_column **right_cols, @@ -238,10 +238,10 @@ gdf_error gdf_inner_join( gdf_context *join_context); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Performs a left join (also known as left outer join) on the * specified columns of two dataframes (left, right) - * + * * @Param[in] left_cols[] The columns of the left dataframe * @Param[in] num_left_cols The number of columns in the left dataframe * @Param[in] left_join_cols[] The column indices of columns from the left dataframe @@ -258,13 +258,13 @@ gdf_error gdf_inner_join( * @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table * @Param[in] join_context The context to use to control how the join is performed,e.g., * sort vs hash based implementation - * + * * @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate * error code */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_left_join( - gdf_column **left_cols, + gdf_column **left_cols, int num_left_cols, int left_join_cols[], gdf_column **right_cols, @@ -278,10 +278,10 @@ gdf_error gdf_left_join( gdf_context *join_context); /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Performs a full join (also known as full outer join) on the * specified columns of two dataframes (left, right) - * + * * @Param[in] left_cols[] The columns of the left dataframe * @Param[in] num_left_cols The number of columns in the left dataframe * @Param[in] left_join_cols[] The column indices of columns from the left dataframe @@ -298,13 +298,13 @@ gdf_error gdf_left_join( * @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table * @Param[in] join_context The context to use to control how the join is performed,e.g., * sort vs hash based implementation - * + * * @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate * error code */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_full_join( - gdf_column **left_cols, + gdf_column **left_cols, int num_left_cols, int left_join_cols[], gdf_column **right_cols, @@ -320,32 +320,32 @@ gdf_error gdf_full_join( /* partioning */ /* --------------------------------------------------------------------------*/ -/** - * @brief Computes the hash values of the rows in the specified columns of the - * input columns and bins the hash values into the desired number of partitions. - * Rearranges the input columns such that rows with hash values in the same bin +/** + * @brief Computes the hash values of the rows in the specified columns of the + * input columns and bins the hash values into the desired number of partitions. + * Rearranges the input columns such that rows with hash values in the same bin * are contiguous. - * + * * @Param[in] num_input_cols The number of columns in the input columns * @Param[in] input[] The input set of columns * @Param[in] columns_to_hash[] Indices of the columns in the input set to hash * @Param[in] num_cols_to_hash The number of columns to hash * @Param[in] num_partitions The number of partitions to rearrange the input rows into - * @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement + * @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement * of the input columns into the desired number of partitions * @Param[out] partition_offsets Preallocated array the size of the number of * partitions. Where partition_offsets[i] indicates the starting position * of partition 'i' * @Param[in] hash The hash function to use - * + * * @Returns If the operation was successful, returns GDF_SUCCESS */ /* ----------------------------------------------------------------------------*/ -gdf_error gdf_hash_partition(int num_input_cols, - gdf_column * input[], +gdf_error gdf_hash_partition(int num_input_cols, + gdf_column * input[], int columns_to_hash[], int num_cols_to_hash, - int num_partitions, + int num_partitions, gdf_column * partitioned_output[], int partition_offsets[], gdf_hash_func hash); @@ -363,14 +363,14 @@ gdf_error gdf_prefixsum_i64(gdf_column *inp, gdf_column *out, int inclusive); /* hashing */ /* --------------------------------------------------------------------------*/ -/** +/** * @Synopsis Computes the hash value of each row in the input set of columns. - * + * * @Param num_cols The number of columns in the input set * @Param input The list of columns whose rows will be hashed * @Param hash The hash function to use * @Param output The hash value of each row of the input - * + * * @Returns GDF_SUCCESS if the operation was successful, otherwise an appropriate * error code */ @@ -786,12 +786,13 @@ gdf_error gdf_quantile_aprrox( gdf_column* col_in, //input column; /* replace */ -/// \brief Replace `to_replace` data of `column` with `values` +/// \brief For each value in `to_replace`, find all instances of that value +/// in `column` and replace it with the corresponding value in `values`. /// \param[in/out] column data /// \param[in] to_replace contains values of column that will be replaced /// \param[in] values contains the replacement values /// /// Note that `to_replace` and `values` are related by the index -gdf_error gdf_replace(gdf_column * column, - const gdf_column *to_replace, - const gdf_column *values); +gdf_error gdf_find_and_replace_all(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values); diff --git a/src/replace.cu b/src/replace.cu index 9b5f994f..bfe827a5 100644 --- a/src/replace.cu +++ b/src/replace.cu @@ -129,16 +129,17 @@ NotSameDType(const gdf_column *column, } // namespace -/// \brief Replace `to_replace` data of `column` with `values` +/// \brief For each value in `to_replace`, find all instances of that value +/// in `column` and replace it with the corresponding value in `values`. /// \param[in/out] column data /// \param[in] to_replace contains values of column that will be replaced /// \param[in] values contains the replacement values /// /// Note that `to_replace` and `values` are related by the index gdf_error -gdf_replace(gdf_column * column, - const gdf_column *to_replace, - const gdf_column *values) { +gdf_find_and_replace_all(gdf_column * column, + const gdf_column *to_replace, + const gdf_column *values) { if (NotEqualReplacementSize(to_replace, values)) { return GDF_COLUMN_SIZE_MISMATCH; } @@ -146,7 +147,7 @@ gdf_replace(gdf_column * column, if (NotSameDType(column, to_replace, values)) { return GDF_CUDA_ERROR; } switch (column->dtype) { -#define WHEN(DTYPE) \ +#define REPLACE_CASE(DTYPE) \ case GDF_##DTYPE: { \ using value_type = gdf_dtype_traits::value_type; \ return Replace(static_cast(column->data), \ @@ -156,17 +157,17 @@ gdf_replace(gdf_column * column, static_cast(values->size)); \ } - WHEN(INT8); - WHEN(INT16); - WHEN(INT32); - WHEN(INT64); - WHEN(FLOAT32); - WHEN(FLOAT64); - WHEN(DATE32); - WHEN(DATE64); - WHEN(TIMESTAMP); - -#undef WHEN + REPLACE_CASE(INT8); + REPLACE_CASE(INT16); + REPLACE_CASE(INT32); + REPLACE_CASE(INT64); + REPLACE_CASE(FLOAT32); + REPLACE_CASE(FLOAT64); + REPLACE_CASE(DATE32); + REPLACE_CASE(DATE64); + REPLACE_CASE(TIMESTAMP); + +#undef REPLACE_CASE case GDF_invalid: default: return GDF_UNSUPPORTED_DTYPE; From b8e7dddb6611efafd0a19708c74a2d5030adf6d1 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 25 Oct 2018 07:04:45 -0500 Subject: [PATCH 29/30] [replace-function] Update name for test --- src/tests/replace/CMakeLists.txt | 1 - src/tests/replace/replace-test.cu | 107 ++++++++++++++---------------- 2 files changed, 51 insertions(+), 57 deletions(-) diff --git a/src/tests/replace/CMakeLists.txt b/src/tests/replace/CMakeLists.txt index 7da0c2f9..bdf3c5d2 100644 --- a/src/tests/replace/CMakeLists.txt +++ b/src/tests/replace/CMakeLists.txt @@ -16,4 +16,3 @@ #============================================================================= configure_test(replace-test replace-test.cu) - diff --git a/src/tests/replace/replace-test.cu b/src/tests/replace/replace-test.cu index f17a5932..c6ec346a 100644 --- a/src/tests/replace/replace-test.cu +++ b/src/tests/replace/replace-test.cu @@ -29,8 +29,6 @@ MakeDeviceVector(const std::initializer_list list) { return device_data; } - - // This is the main test feature template class ReplaceTest : public testing::Test { @@ -47,7 +45,8 @@ protected: to_replace = MakeGdfColumn(to_replace_data); values = MakeGdfColumn(values_data); - const gdf_error status = gdf_replace(&column, &to_replace, &values); + const gdf_error status = + gdf_find_and_replace_all(&column, &to_replace, &values); EXPECT_EQ(GDF_SUCCESS, status); @@ -79,7 +78,6 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition) { EXPECT_EQ(6, results[7]); } - // Similar test as ReplaceEvenPosition, but with unordered data TYPED_TEST(ReplaceTest, Unordered) { thrust::device_ptr results = @@ -91,7 +89,6 @@ TYPED_TEST(ReplaceTest, Unordered) { EXPECT_EQ(2, results[7]); } - // Testing with Empty Replace TYPED_TEST(ReplaceTest, EmptyReplace) { thrust::device_ptr results = @@ -127,58 +124,56 @@ TYPED_TEST(ReplaceTest, EmptyData) { this->test({}, {10, 11, 12}, {15, 16, 17}); } - // Test with much larger data sets TEST(LargeScaleReplaceTest, LargeScaleReplaceTest) { + const int DATA_SIZE = 1000000; + const int REPLACE_SIZE = 10000; + + srand((unsigned) time(NULL)); - { - const int DATA_SIZE = 1000000; - const int REPLACE_SIZE = 10000; - - srand((unsigned)time(NULL)); - - std::vector column_data(DATA_SIZE); - for (int i = 0; i < DATA_SIZE; i++){ - column_data[i] = rand() % (2*REPLACE_SIZE); - } - - std::vector from(DATA_SIZE); - std::vector to(DATA_SIZE); - int count = 0; - for (int i = 0; i < 7; i++){ - for (int j = 0; j < REPLACE_SIZE; j += 7){ - from[i+j] = count; - count++; - to[i+j] = count; - } - } - - thrust::device_vector device_data(column_data); - gdf_column data_gdf = MakeGdfColumn(device_data); - thrust::device_vector device_from(from); - gdf_column from_gdf = MakeGdfColumn(device_from); - thrust::device_vector device_to(to); - gdf_column to_gdf = MakeGdfColumn(device_to); - - const gdf_error status = gdf_replace(&data_gdf, &from_gdf, &to_gdf); - - EXPECT_EQ(GDF_SUCCESS, status); - - std::vector replaced_data(DATA_SIZE); - thrust::copy(device_data.begin(), device_data.end(), replaced_data.begin()); - - count = 0; - for (int i = 0; i < DATA_SIZE; i++){ - if (column_data[i] < REPLACE_SIZE){ - EXPECT_EQ(column_data[i] + 1, replaced_data[i]); - if (column_data[i] + 1 != replaced_data[i]){ - std::cout<<"failed at "< 20){ - break; - } - } - } - } - } + std::vector column_data(DATA_SIZE); + for (int i = 0; i < DATA_SIZE; i++) { + column_data[i] = rand() % (2 * REPLACE_SIZE); + } + + std::vector from(DATA_SIZE); + std::vector to(DATA_SIZE); + int count = 0; + for (int i = 0; i < 7; i++) { + for (int j = 0; j < REPLACE_SIZE; j += 7) { + from[i + j] = count; + count++; + to[i + j] = count; + } + } + + thrust::device_vector device_data(column_data); + gdf_column data_gdf = MakeGdfColumn(device_data); + thrust::device_vector device_from(from); + gdf_column from_gdf = MakeGdfColumn(device_from); + thrust::device_vector device_to(to); + gdf_column to_gdf = MakeGdfColumn(device_to); + + const gdf_error status = + gdf_find_and_replace_all(&data_gdf, &from_gdf, &to_gdf); + + EXPECT_EQ(GDF_SUCCESS, status); + + std::vector replaced_data(DATA_SIZE); + thrust::copy(device_data.begin(), device_data.end(), replaced_data.begin()); + + count = 0; + for (int i = 0; i < DATA_SIZE; i++) { + if (column_data[i] < REPLACE_SIZE) { + EXPECT_EQ(column_data[i] + 1, replaced_data[i]); + if (column_data[i] + 1 != replaced_data[i]) { + std::cout << "failed at " << i + << " column_data[i]: " << column_data[i] + << " replaced_data[i]: " << replaced_data[i] + << std::endl; + count++; + if (count > 20) { break; } + } + } + } } From fdb7afc63169ed13a7b861a92b9d86b5df8aec45 Mon Sep 17 00:00:00 2001 From: gcca Date: Thu, 25 Oct 2018 07:10:25 -0500 Subject: [PATCH 30/30] [replace-function] Update function for benchmark --- src/bench/CMakeLists.txt | 74 +++++++++++++++----------- src/bench/replace/CMakeLists.txt | 4 -- src/bench/replace/replace-benchmark.cu | 3 +- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt index 9facb01f..adff8993 100644 --- a/src/bench/CMakeLists.txt +++ b/src/bench/CMakeLists.txt @@ -1,43 +1,57 @@ +#============================================================================= +# Copyright 2018 BlazingDB, Inc. +# Copyright 2018 Cristhian Alberto Gonzales Castillo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= if(GDF_BENCHMARK) - include(ExternalProject) +include(ExternalProject) - ExternalProject_Add(benchmark_ep - CMAKE_ARGS - -DCMAKE_BUILD_TYPE=RELEASE - -DCMAKE_INSTALL_PREFIX=build - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.4.1 - UPDATE_COMMAND "" - ) - ExternalProject_Get_property(benchmark_ep BINARY_DIR) - set(BENCHMARK_ROOT ${BINARY_DIR}/build) +ExternalProject_Add(benchmark_ep + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=RELEASE + -DCMAKE_INSTALL_PREFIX=build + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.4.1 + UPDATE_COMMAND "" +) +ExternalProject_Get_property(benchmark_ep BINARY_DIR) +set(BENCHMARK_ROOT ${BINARY_DIR}/build) - file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) - file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include) +file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib) - add_library(Google::Benchmark INTERFACE IMPORTED) - add_dependencies(Google::Benchmark benchmark_ep) - set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) - set_target_properties(Google::Benchmark - PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) +add_library(Google::Benchmark INTERFACE IMPORTED) +add_dependencies(Google::Benchmark benchmark_ep) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include) +set_target_properties(Google::Benchmark + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a) - add_library(Google::Benchmark::Main INTERFACE IMPORTED) - set_target_properties(Google::Benchmark::Main - PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) +add_library(Google::Benchmark::Main INTERFACE IMPORTED) +set_target_properties(Google::Benchmark::Main + PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a) function(GDF_ADD_BENCHMARK TARGET) - list(REMOVE_AT ARGV 0) - cuda_add_executable(${TARGET} ${ARGV}) - target_link_libraries(${TARGET} - Google::Benchmark Google::Benchmark::Main gdf) + list(REMOVE_AT ARGV 0) + cuda_add_executable(${TARGET} ${ARGV}) + target_link_libraries(${TARGET} Google::Benchmark Google::Benchmark::Main gdf) endfunction() -endif() - - add_subdirectory(replace) \ No newline at end of file +add_subdirectory(replace) +endif() diff --git a/src/bench/replace/CMakeLists.txt b/src/bench/replace/CMakeLists.txt index 76bc607d..4d7296fc 100644 --- a/src/bench/replace/CMakeLists.txt +++ b/src/bench/replace/CMakeLists.txt @@ -15,8 +15,4 @@ # limitations under the License. #============================================================================= - -if (GDF_BENCHMARK) - GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu) -endif() diff --git a/src/bench/replace/replace-benchmark.cu b/src/bench/replace/replace-benchmark.cu index 03a8e805..a2001d44 100644 --- a/src/bench/replace/replace-benchmark.cu +++ b/src/bench/replace/replace-benchmark.cu @@ -98,7 +98,8 @@ BM_GPU_LoopReplace(benchmark::State &state) { gdf_column values = MakeGdfColumn(values_vector); for (auto _ : state) { - const gdf_error status = gdf_replace(&column, &to_replace, &values); + const gdf_error status = + gdf_find_and_replace_all(&column, &to_replace, &values); state.PauseTiming(); if (status != GDF_SUCCESS) { state.SkipWithError("Failed replace"); } state.ResumeTiming();