Skip to content

Commit 7968834

Browse files
committed
Merge branch 'bug-write_orc-multiblock-sf' of https://github.com/vuule/cudf into bug-write_orc-multiblock-sf
2 parents b099c07 + 7a32dbb commit 7968834

File tree

101 files changed

+2517
-3741
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+2517
-3741
lines changed

.github/workflows/build.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
arch: "amd64"
6363
branch: ${{ inputs.branch }}
6464
build_type: ${{ inputs.build_type || 'branch' }}
65-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
65+
container_image: "rapidsai/ci-conda:latest"
6666
date: ${{ inputs.date }}
6767
node_type: "gpu-v100-latest-1"
6868
run_script: "ci/build_docs.sh"

.github/workflows/pr.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ jobs:
186186
build_type: pull-request
187187
node_type: "gpu-v100-latest-1"
188188
arch: "amd64"
189-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
189+
container_image: "rapidsai/ci-conda:latest"
190190
run_script: "ci/test_java.sh"
191191
static-configure:
192192
needs: checks
@@ -207,7 +207,7 @@ jobs:
207207
build_type: pull-request
208208
node_type: "gpu-v100-latest-1"
209209
arch: "amd64"
210-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
210+
container_image: "rapidsai/ci-conda:latest"
211211
run_script: "ci/test_notebooks.sh"
212212
docs-build:
213213
needs: conda-python-build
@@ -217,7 +217,7 @@ jobs:
217217
build_type: pull-request
218218
node_type: "gpu-v100-latest-1"
219219
arch: "amd64"
220-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
220+
container_image: "rapidsai/ci-conda:latest"
221221
run_script: "ci/build_docs.sh"
222222
wheel-build-libcudf:
223223
needs: checks

.github/workflows/test.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
sha: ${{ inputs.sha }}
4242
node_type: "gpu-v100-latest-1"
4343
arch: "amd64"
44-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
44+
container_image: "rapidsai/ci-conda:latest"
4545
run_script: "ci/test_cpp_memcheck.sh"
4646
static-configure:
4747
secrets: inherit
@@ -94,7 +94,7 @@ jobs:
9494
sha: ${{ inputs.sha }}
9595
node_type: "gpu-v100-latest-1"
9696
arch: "amd64"
97-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
97+
container_image: "rapidsai/ci-conda:latest"
9898
run_script: "ci/test_java.sh"
9999
conda-notebook-tests:
100100
secrets: inherit
@@ -106,7 +106,7 @@ jobs:
106106
sha: ${{ inputs.sha }}
107107
node_type: "gpu-v100-latest-1"
108108
arch: "amd64"
109-
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
109+
container_image: "rapidsai/ci-conda:latest"
110110
run_script: "ci/test_notebooks.sh"
111111
wheel-tests-cudf:
112112
secrets: inherit

ci/test_python_other.sh

+3-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
33

44
# Support invoking test_python_cudf.sh outside the script directory
55
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
@@ -24,8 +24,8 @@ EXITCODE=0
2424
trap "EXITCODE=1" ERR
2525
set +e
2626

27-
rapids-logger "pytest dask_cudf (dask-expr)"
28-
DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
27+
rapids-logger "pytest dask_cudf"
28+
./ci/run_dask_cudf_pytests.sh \
2929
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
3030
--numprocesses=8 \
3131
--dist=worksteal \
@@ -34,13 +34,6 @@ DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
3434
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
3535
--cov-report=term
3636

37-
rapids-logger "pytest dask_cudf (legacy)"
38-
DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
39-
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
40-
--numprocesses=8 \
41-
--dist=worksteal \
42-
.
43-
4437
rapids-logger "pytest cudf_kafka"
4538
./ci/run_cudf_kafka_pytests.sh \
4639
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-kafka.xml"

ci/test_wheel_dask_cudf.sh

+3-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
33

44
set -eou pipefail
55

@@ -30,21 +30,11 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
3030
mkdir -p "${RAPIDS_TESTS_DIR}"
3131

3232
# Run tests in dask_cudf/tests and dask_cudf/io/tests
33-
rapids-logger "pytest dask_cudf (dask-expr)"
33+
rapids-logger "pytest dask_cudf"
3434
pushd python/dask_cudf/dask_cudf
35-
DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
35+
python -m pytest \
3636
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
3737
--numprocesses=8 \
3838
--dist=worksteal \
3939
.
4040
popd
41-
42-
# Run tests in dask_cudf/tests and dask_cudf/io/tests (legacy)
43-
rapids-logger "pytest dask_cudf (legacy)"
44-
pushd python/dask_cudf/dask_cudf
45-
DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \
46-
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
47-
--numprocesses=8 \
48-
--dist=worksteal \
49-
.
50-
popd

cpp/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ add_library(
461461
src/hash/sha256_hash.cu
462462
src/hash/sha384_hash.cu
463463
src/hash/sha512_hash.cu
464+
src/hash/xxhash_32.cu
464465
src/hash/xxhash_64.cu
465466
src/interop/dlpack.cpp
466467
src/interop/arrow_utilities.cpp

cpp/benchmarks/CMakeLists.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# =============================================================================
2-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
55
# in compliance with the License. You may obtain a copy of the License at
@@ -425,6 +425,11 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
425425
# ---------------------------------------------------------------------------------
426426
ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
427427

428+
# ##################################################################################################
429+
# * rolling benchmark
430+
# ---------------------------------------------------------------------------------
431+
ConfigureNVBench(ROLLING_NVBENCH rolling/grouped_rolling_sum.cpp rolling/rolling_sum.cpp)
432+
428433
add_custom_target(
429434
run_benchmarks
430435
DEPENDS CUDF_BENCHMARKS
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <benchmarks/common/generate_input.hpp>
18+
#include <benchmarks/fixture/benchmark_fixture.hpp>
19+
20+
#include <cudf/aggregation.hpp>
21+
#include <cudf/rolling.hpp>
22+
#include <cudf/sorting.hpp>
23+
#include <cudf/utilities/default_stream.hpp>
24+
25+
#include <nvbench/nvbench.cuh>
26+
27+
template <typename Type>
28+
void bench_row_grouped_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
29+
{
30+
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
31+
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
32+
auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
33+
auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
34+
auto const min_periods = static_cast<cudf::size_type>(state.get_int64("min_periods"));
35+
36+
auto const keys = [&] {
37+
data_profile const profile =
38+
data_profile_builder()
39+
.cardinality(cardinality)
40+
.no_validity()
41+
.distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
42+
auto keys = create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
43+
return cudf::sort(cudf::table_view{{keys->view()}});
44+
}();
45+
data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
46+
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
47+
auto vals = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
48+
49+
auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
50+
51+
auto const mem_stats_logger = cudf::memory_stats_logger();
52+
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
53+
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
54+
auto const result = cudf::grouped_rolling_window(
55+
keys->view(), vals->view(), preceding_size, following_size, min_periods, *req);
56+
});
57+
auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
58+
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
59+
state.add_buffer_size(
60+
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
61+
}
62+
63+
NVBENCH_BENCH_TYPES(bench_row_grouped_rolling_sum,
64+
NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
65+
.set_name("row_grouped_rolling_sum")
66+
.add_int64_power_of_two_axis("num_rows", {14, 28})
67+
.add_int64_axis("preceding_size", {1, 10})
68+
.add_int64_axis("following_size", {2})
69+
.add_int64_axis("min_periods", {1})
70+
.add_int64_axis("cardinality", {10, 100, 1'000'000, 100'000'000});
+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <benchmarks/common/generate_input.hpp>
18+
#include <benchmarks/fixture/benchmark_fixture.hpp>
19+
20+
#include <cudf/aggregation.hpp>
21+
#include <cudf/rolling.hpp>
22+
#include <cudf/sorting.hpp>
23+
#include <cudf/types.hpp>
24+
#include <cudf/utilities/default_stream.hpp>
25+
26+
#include <rmm/device_buffer.hpp>
27+
#include <rmm/device_uvector.hpp>
28+
#include <rmm/exec_policy.hpp>
29+
30+
#include <thrust/iterator/counting_iterator.h>
31+
32+
#include <nvbench/nvbench.cuh>
33+
34+
#include <algorithm>
35+
36+
template <typename Type>
37+
void bench_row_fixed_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
38+
{
39+
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
40+
auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
41+
auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
42+
auto const min_periods = static_cast<cudf::size_type>(state.get_int64("min_periods"));
43+
44+
data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
45+
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
46+
auto vals = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
47+
48+
auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
49+
50+
auto const mem_stats_logger = cudf::memory_stats_logger();
51+
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
52+
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
53+
auto const result =
54+
cudf::rolling_window(vals->view(), preceding_size, following_size, min_periods, *req);
55+
});
56+
auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
57+
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
58+
state.add_buffer_size(
59+
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
60+
}
61+
62+
template <typename Type>
63+
void bench_row_variable_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
64+
{
65+
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
66+
auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
67+
auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
68+
69+
auto vals = [&]() {
70+
data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
71+
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
72+
return create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
73+
}();
74+
75+
auto preceding = [&]() {
76+
auto data = std::vector<cudf::size_type>(num_rows);
77+
auto it = thrust::make_counting_iterator<cudf::size_type>(0);
78+
std::transform(it, it + num_rows, data.begin(), [num_rows, preceding_size](auto i) {
79+
return std::min(i + 1, std::max(preceding_size, i + 1 - num_rows));
80+
});
81+
auto buf = rmm::device_buffer(
82+
data.data(), num_rows * sizeof(cudf::size_type), cudf::get_default_stream());
83+
cudf::get_default_stream().synchronize();
84+
return std::make_unique<cudf::column>(cudf::data_type(cudf::type_to_id<cudf::size_type>()),
85+
num_rows,
86+
std::move(buf),
87+
rmm::device_buffer{},
88+
0);
89+
}();
90+
91+
auto following = [&]() {
92+
auto data = std::vector<cudf::size_type>(num_rows);
93+
auto it = thrust::make_counting_iterator<cudf::size_type>(0);
94+
std::transform(it, it + num_rows, data.begin(), [num_rows, following_size](auto i) {
95+
return std::max(-i - 1, std::min(following_size, num_rows - i - 1));
96+
});
97+
auto buf = rmm::device_buffer(
98+
data.data(), num_rows * sizeof(cudf::size_type), cudf::get_default_stream());
99+
cudf::get_default_stream().synchronize();
100+
return std::make_unique<cudf::column>(cudf::data_type(cudf::type_to_id<cudf::size_type>()),
101+
num_rows,
102+
std::move(buf),
103+
rmm::device_buffer{},
104+
0);
105+
}();
106+
107+
auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
108+
109+
auto const mem_stats_logger = cudf::memory_stats_logger();
110+
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
111+
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
112+
auto const result =
113+
cudf::rolling_window(vals->view(), preceding->view(), following->view(), 1, *req);
114+
});
115+
auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
116+
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
117+
state.add_buffer_size(
118+
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
119+
}
120+
121+
NVBENCH_BENCH_TYPES(bench_row_fixed_rolling_sum,
122+
NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
123+
.set_name("row_fixed_rolling_sum")
124+
.add_int64_power_of_two_axis("num_rows", {14, 22, 28})
125+
.add_int64_axis("preceding_size", {1, 10, 100})
126+
.add_int64_axis("following_size", {2})
127+
.add_int64_axis("min_periods", {1, 20});
128+
129+
NVBENCH_BENCH_TYPES(bench_row_variable_rolling_sum,
130+
NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
131+
.set_name("row_variable_rolling_sum")
132+
.add_int64_power_of_two_axis("num_rows", {14, 22, 28})
133+
.add_int64_axis("preceding_size", {10, 100})
134+
.add_int64_axis("following_size", {2});

cpp/include/cudf/hashing.hpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -166,6 +166,26 @@ std::unique_ptr<column> sha512(
166166
rmm::cuda_stream_view stream = cudf::get_default_stream(),
167167
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
168168

169+
/**
170+
* @brief Computes the XXHash_32 hash value of each row in the given table
171+
*
172+
* This function computes the hash of each column using the `seed` for the first column
173+
* and the resulting hash as a seed for the next column and so on.
174+
* The result is a uint32 value for each row.
175+
*
176+
* @param input The table of columns to hash
177+
* @param seed Optional seed value to use for the hash function
178+
* @param stream CUDA stream used for device memory operations and kernel launches
179+
* @param mr Device memory resource used to allocate the returned column's device memory
180+
*
181+
* @returns A column where each row is the hash of a row from the input
182+
*/
183+
std::unique_ptr<column> xxhash_32(
184+
table_view const& input,
185+
uint32_t seed = DEFAULT_HASH_SEED,
186+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
187+
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
188+
169189
/**
170190
* @brief Computes the XXHash_64 hash value of each row in the given table
171191
*

0 commit comments

Comments
 (0)