Skip to content
This repository has been archived by the owner on Dec 21, 2018. It is now read-only.

[REVIEW] Replace function #106

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
350dafc
[replace-function] API definition
gcca Aug 22, 2018
8f805d9
[replace-function] Merged from master
gcca Aug 23, 2018
5a8b93a
[replace-function] Add first implementation
gcca Aug 23, 2018
adf537d
[replace-function] Add replacement by lower bound
gcca Aug 23, 2018
3214b08
[replace-function] Update typed unit test
gcca Aug 24, 2018
3935048
[replace-function] Update to replace on unordered column
gcca Aug 30, 2018
a4a7383
[replace-function] Merged from master
gcca Aug 30, 2018
f5afc31
[replace-function] Update class replace functor
gcca Aug 30, 2018
ef3f382
[replace-function] Common test fixtures
gcca Aug 30, 2018
003bd9d
created larger scale test for replace funtion
Aug 31, 2018
5553c67
[replace-function] Replace kernel
gcca Sep 3, 2018
0f3891d
[replace-function] Check device attribute status
gcca Sep 4, 2018
26c522a
[replace-function] Move ptr construction (invariant)
gcca Sep 4, 2018
2a7a023
[replace-function] Add replace benchmark against cpu
gcca Sep 5, 2018
200da31
Merge branch 'master' into replace-function
Sep 27, 2018
a2fa767
moved replace benchmark to bench folder. Added comments and more test…
Sep 28, 2018
cc3beca
[replace-function] API definition
gcca Aug 22, 2018
ef4de00
[replace-function] Add first implementation
gcca Aug 23, 2018
0b62cd9
[replace-function] Add replacement by lower bound
gcca Aug 23, 2018
5f2c338
[replace-function] Update typed unit test
gcca Aug 24, 2018
35c765d
[replace-function] Update to replace on unordered column
gcca Aug 30, 2018
d3e50d3
[replace-function] Update class replace functor
gcca Aug 30, 2018
7c12b1a
[replace-function] Common test fixtures
gcca Aug 30, 2018
c01252b
created larger scale test for replace funtion
Aug 31, 2018
321656e
[replace-function] Replace kernel
gcca Sep 3, 2018
7f80017
[replace-function] Check device attribute status
gcca Sep 4, 2018
207fe0e
[replace-function] Move ptr construction (invariant)
gcca Sep 4, 2018
8767314
[replace-function] Add replace benchmark against cpu
gcca Sep 5, 2018
622abbd
moved replace benchmark to bench folder. Added comments and more test…
Sep 28, 2018
d364fd8
[replace-function] Add documentation
gcca Oct 17, 2018
95a8a21
[replace-function] Merged from remote
gcca Oct 17, 2018
99f6ebf
Merge branch 'master' into replace-function
gcca Oct 25, 2018
c5f27c1
[replace-function] Update function name
gcca Oct 25, 2018
b8e7ddd
[replace-function] Update name for test
gcca Oct 25, 2018
fdb7afc
[replace-function] Update function for benchmark
gcca Oct 25, 2018
41b058c
Merge upstream 'master' into replace-function
gcca Oct 26, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions libgdf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ cuda_add_library(gdf SHARED
src/sqls_ops.cu
src/streamcompactionops.cu
src/unaryops.cu
src/replace.cu
#src/windowedops.cu
src/quantiles.cu
src/io/csv/csv-reader.cu
Expand Down Expand Up @@ -220,5 +221,10 @@ if(GTEST_FOUND)
else()
message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.")
endif()

if(GDF_BENCHMARK)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/bench)
endif()

# Print the project summary
feature_summary(WHAT ALL INCLUDE_QUIET_PACKAGES FATAL_ON_MISSING_REQUIRED_PACKAGES)
103 changes: 58 additions & 45 deletions libgdf/include/gdf/cffi/functions.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#pragma once

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Start a NVTX range with predefined color.
*
* This function is useful only for profiling with nvvp or Nsight Systems. It
* demarcates the begining of a user-defined range with a specified name and
* color that will show up in the timeline view of nvvp/Nsight Systems. Can be
* nested within other ranges.
*
*
* @Param name The name of the NVTX range
* @Param color The predefined gdf_color enum to use to color this range
*
* @Returns
*
* @Returns
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_nvtx_range_push(char const * const name, gdf_color color );
Expand All @@ -21,47 +21,47 @@ gdf_error gdf_nvtx_range_push(char const * const name, gdf_color color );


/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Start a NVTX range with a custom ARGB color code.
*
* This function is useful only for profiling with nvvp or Nsight Systems. It
* demarcates the begining of a user-defined range with a specified name and
* color that will show up in the timeline view of nvvp/Nsight Systems. Can be
* nested within other ranges.
*
*
* @Param name The name of the NVTX range
* @Param color The ARGB hex color code to use to color this range (e.g., 0xFF00FF00)
*
* @Returns
*
* @Returns
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color );


/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Ends the inner-most NVTX range.
*
* This function is useful only for profiling with nvvp or Nsight Systems. It
* will demarcate the end of the inner-most range, i.e., the most recent call to
* gdf_nvtx_range_push.
*
* @Returns
*
* @Returns
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_nvtx_range_pop();

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Counts the number of valid bits in the mask that corresponds to
* the specified number of rows.
*
*
* @Param[in] masks Array of gdf_valid_types with enough bits to represent
* num_rows number of rows
* @Param[in] num_rows The number of rows represented in the bit-validity mask.
* @Param[out] count The number of valid rows in the mask
*
* @Returns GDF_SUCCESS upon successful completion.
*
* @Returns GDF_SUCCESS upon successful completion.
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_count_nonzero_mask(gdf_valid_type const * masks, int num_rows, int * count);
Expand All @@ -79,15 +79,15 @@ gdf_error gdf_column_view_augmented(gdf_column *column, void *data, gdf_valid_ty
gdf_error gdf_column_free(gdf_column *column);

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Concatenates the gdf_columns into a single, contiguous column,
* including the validity bitmasks
*
* @Param[out] output A column whose buffers are already allocated that will
*
* @Param[out] output A column whose buffers are already allocated that will
* @Param[in] columns_to_conat[] The columns to concatenate
* @Param[in] num_columns The number of columns to concatenate
* contain the concatenation of the input columns
*
*
* @Returns GDF_SUCCESS upon successful completion
*/
/* ----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -198,10 +198,10 @@ gdf_error gdf_segmented_radixsort_generic(gdf_segmented_radixsort_plan_type *hdl


/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Performs an inner join on the specified columns of two
* dataframes (left, right)
*
*
* @Param[in] left_cols[] The columns of the left dataframe
* @Param[in] num_left_cols The number of columns in the left dataframe
* @Param[in] left_join_cols[] The column indices of columns from the left dataframe
Expand All @@ -218,13 +218,13 @@ gdf_error gdf_segmented_radixsort_generic(gdf_segmented_radixsort_plan_type *hdl
* @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table
* @Param[in] join_context The context to use to control how the join is performed,e.g.,
* sort vs hash based implementation
*
*
* @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate
* error code
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_inner_join(
gdf_column **left_cols,
gdf_column **left_cols,
int num_left_cols,
int left_join_cols[],
gdf_column **right_cols,
Expand All @@ -238,10 +238,10 @@ gdf_error gdf_inner_join(
gdf_context *join_context);

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Performs a left join (also known as left outer join) on the
* specified columns of two dataframes (left, right)
*
*
* @Param[in] left_cols[] The columns of the left dataframe
* @Param[in] num_left_cols The number of columns in the left dataframe
* @Param[in] left_join_cols[] The column indices of columns from the left dataframe
Expand All @@ -258,13 +258,13 @@ gdf_error gdf_inner_join(
* @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table
* @Param[in] join_context The context to use to control how the join is performed,e.g.,
* sort vs hash based implementation
*
*
* @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate
* error code
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_left_join(
gdf_column **left_cols,
gdf_column **left_cols,
int num_left_cols,
int left_join_cols[],
gdf_column **right_cols,
Expand All @@ -278,10 +278,10 @@ gdf_error gdf_left_join(
gdf_context *join_context);

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Performs a full join (also known as full outer join) on the
* specified columns of two dataframes (left, right)
*
*
* @Param[in] left_cols[] The columns of the left dataframe
* @Param[in] num_left_cols The number of columns in the left dataframe
* @Param[in] left_join_cols[] The column indices of columns from the left dataframe
Expand All @@ -298,13 +298,13 @@ gdf_error gdf_left_join(
* @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table
* @Param[in] join_context The context to use to control how the join is performed,e.g.,
* sort vs hash based implementation
*
*
* @Returns GDF_SUCCESS if the join operation was successful, otherwise an appropriate
* error code
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_full_join(
gdf_column **left_cols,
gdf_column **left_cols,
int num_left_cols,
int left_join_cols[],
gdf_column **right_cols,
Expand All @@ -320,32 +320,32 @@ gdf_error gdf_full_join(
/* partioning */

/* --------------------------------------------------------------------------*/
/**
* @brief Computes the hash values of the rows in the specified columns of the
* input columns and bins the hash values into the desired number of partitions.
* Rearranges the input columns such that rows with hash values in the same bin
/**
* @brief Computes the hash values of the rows in the specified columns of the
* input columns and bins the hash values into the desired number of partitions.
* Rearranges the input columns such that rows with hash values in the same bin
* are contiguous.
*
*
* @Param[in] num_input_cols The number of columns in the input columns
* @Param[in] input[] The input set of columns
* @Param[in] columns_to_hash[] Indices of the columns in the input set to hash
* @Param[in] num_cols_to_hash The number of columns to hash
* @Param[in] num_partitions The number of partitions to rearrange the input rows into
* @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement
* @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement
* of the input columns into the desired number of partitions
* @Param[out] partition_offsets Preallocated array the size of the number of
* partitions. Where partition_offsets[i] indicates the starting position
* of partition 'i'
* @Param[in] hash The hash function to use
*
*
* @Returns If the operation was successful, returns GDF_SUCCESS
*/
/* ----------------------------------------------------------------------------*/
gdf_error gdf_hash_partition(int num_input_cols,
gdf_column * input[],
gdf_error gdf_hash_partition(int num_input_cols,
gdf_column * input[],
int columns_to_hash[],
int num_cols_to_hash,
int num_partitions,
int num_partitions,
gdf_column * partitioned_output[],
int partition_offsets[],
gdf_hash_func hash);
Expand All @@ -363,14 +363,14 @@ gdf_error gdf_prefixsum_i64(gdf_column *inp, gdf_column *out, int inclusive);
/* hashing */

/* --------------------------------------------------------------------------*/
/**
/**
* @Synopsis Computes the hash value of each row in the input set of columns.
*
*
* @Param num_cols The number of columns in the input set
* @Param input The list of columns whose rows will be hashed
* @Param hash The hash function to use
* @Param output The hash value of each row of the input
*
*
* @Returns GDF_SUCCESS if the operation was successful, otherwise an appropriate
* error code
*/
Expand Down Expand Up @@ -702,7 +702,7 @@ gdf_error gpu_hash_columns(gdf_column ** columns_to_hash, int num_columns, gdf_c

gdf_error get_column_byte_width(gdf_column * col, int * width);

/*
/*
Multi-Column SQL ops:
WHERE (Filtering)
ORDER-BY
Expand Down Expand Up @@ -783,3 +783,16 @@ gdf_error gdf_quantile_aprrox( gdf_column* col_in, //input column;
double q, //requested quantile in [0,1]
void* t_erased_res, //type-erased result of same type as column;
gdf_context* ctxt); //context info

/* replace */

/// \brief For each value in `to_replace`, find all instances of that value
/// in `column` and replace it with the corresponding value in `values`.
/// \param[in/out] column data
/// \param[in] to_replace contains values of column that will be replaced
/// \param[in] values contains the replacement values
///
/// Note that `to_replace` and `values` are related by the index
gdf_error gdf_find_and_replace_all(gdf_column * column,
const gdf_column *to_replace,
const gdf_column *values);
1 change: 1 addition & 0 deletions libgdf/src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ add_subdirectory(datetime)
add_subdirectory(hashing)
add_subdirectory(join)
add_subdirectory(sqls)
add_subdirectory(replace)
add_subdirectory(hash_map)
add_subdirectory(groupby)
add_subdirectory(unaryops)
Expand Down
1 change: 0 additions & 1 deletion libgdf/thirdparty/cnmem
Submodule cnmem deleted from 28a182
1 change: 0 additions & 1 deletion libgdf/thirdparty/cub
Submodule cub deleted from b165e1
1 change: 0 additions & 1 deletion libgdf/thirdparty/moderngpu
Submodule moderngpu deleted from c1fd31
57 changes: 57 additions & 0 deletions src/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#=============================================================================
# Copyright 2018 BlazingDB, Inc.
# Copyright 2018 Cristhian Alberto Gonzales Castillo <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're adding google benchmark in this PR? Shouldn't that be orthogonal to gdf_replace, and therefore a separate PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is the first benchmark I've seen added to libgdf...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since no other PR depends of this, I put google benchmark here. But I could make another PR if it's relevant.

if(GDF_BENCHMARK)

include(ExternalProject)

ExternalProject_Add(benchmark_ep
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=RELEASE
-DCMAKE_INSTALL_PREFIX=build
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.4.1
UPDATE_COMMAND ""
)
ExternalProject_Get_property(benchmark_ep BINARY_DIR)
set(BENCHMARK_ROOT ${BINARY_DIR}/build)

file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/include)
file(MAKE_DIRECTORY ${BENCHMARK_ROOT}/lib)

add_library(Google::Benchmark INTERFACE IMPORTED)
add_dependencies(Google::Benchmark benchmark_ep)
set_target_properties(Google::Benchmark
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${BENCHMARK_ROOT}/include)
set_target_properties(Google::Benchmark
PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark.a)

add_library(Google::Benchmark::Main INTERFACE IMPORTED)
set_target_properties(Google::Benchmark::Main
PROPERTIES INTERFACE_LINK_LIBRARIES ${BENCHMARK_ROOT}/lib/libbenchmark_main.a)


function(GDF_ADD_BENCHMARK TARGET)
list(REMOVE_AT ARGV 0)
cuda_add_executable(${TARGET} ${ARGV})
target_link_libraries(${TARGET} Google::Benchmark Google::Benchmark::Main gdf)
endfunction()


add_subdirectory(replace)
endif()
18 changes: 18 additions & 0 deletions src/bench/replace/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#=============================================================================
# Copyright 2018 BlazingDB, Inc.
# Copyright 2018 Cristhian Alberto Gonzales Castillo <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================

GDF_ADD_BENCHMARK(replace-benchmark replace-benchmark.cu)
Loading