Skip to content

Commit 54a9cd6

Browse files
committed
Merge remote-tracking branch 'upstream/branch-25.02' into prevent-pylibcudf-serialization
2 parents 2d37c08 + dc2a75c commit 54a9cd6

File tree

9 files changed

+81
-56
lines changed

9 files changed

+81
-56
lines changed

cpp/include/cudf/detail/utilities/integer_utils.hpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright 2019 BlazingDB, Inc.
33
* Copyright 2019 Eyal Rozenberg <[email protected]>
4-
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
4+
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
55
*
66
* Licensed under the Apache License, Version 2.0 (the "License");
77
* you may not use this file except in compliance with the License.
@@ -23,6 +23,8 @@
2323
*/
2424

2525
#include <cudf/fixed_point/temporary.hpp>
26+
#include <cudf/types.hpp>
27+
#include <cudf/utilities/error.hpp>
2628

2729
#include <cmath>
2830
#include <cstdlib>
@@ -44,13 +46,17 @@ namespace util {
4446
* `modulus` is positive. The safety is in regard to rollover.
4547
*/
4648
template <typename S>
47-
constexpr S round_up_safe(S number_to_round, S modulus)
49+
CUDF_HOST_DEVICE constexpr S round_up_safe(S number_to_round, S modulus)
4850
{
4951
auto remainder = number_to_round % modulus;
5052
if (remainder == 0) { return number_to_round; }
5153
auto rounded_up = number_to_round - remainder + modulus;
5254
if (rounded_up < number_to_round) {
53-
throw std::invalid_argument("Attempt to round up beyond the type's maximum value");
55+
#ifndef __CUDA_ARCH__
56+
CUDF_FAIL("Attempt to round up beyond the type's maximum value", cudf::data_type_error);
57+
#else
58+
CUDF_UNREACHABLE("Attempt to round up beyond the type's maximum value");
59+
#endif
5460
}
5561
return rounded_up;
5662
}

cpp/include/cudf/utilities/span.hpp

+24-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -197,11 +197,16 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
197197

198198
constexpr host_span() noexcept : base() {} // required to compile on centos
199199

200-
/// Constructor from pointer and size
201-
/// @param data Pointer to the first element in the span
202-
/// @param size The number of elements in the span
203-
/// @param is_device_accessible Whether the data is device accessible (e.g. pinned memory)
204-
constexpr host_span(T* data, std::size_t size, bool is_device_accessible)
200+
/**
201+
* @brief Constructor from pointer and size
202+
*
203+
* @note This needs to be host-device , as it's used by a host-device function in base_2dspan
204+
*
205+
* @param data Pointer to the first element in the span
206+
* @param size The number of elements in the span
207+
* @param is_device_accessible Whether the data is device accessible (e.g. pinned memory)
208+
*/
209+
CUDF_HOST_DEVICE constexpr host_span(T* data, std::size_t size, bool is_device_accessible)
205210
: base(data, size), _is_device_accessible{is_device_accessible}
206211
{
207212
}
@@ -311,8 +316,8 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
311316
* @param count The number of elements in the subspan
312317
* @return A subspan of the sequence, of requested count and offset
313318
*/
314-
[[nodiscard]] constexpr host_span subspan(typename base::size_type offset,
315-
typename base::size_type count) const noexcept
319+
[[nodiscard]] CUDF_HOST_DEVICE constexpr host_span subspan(
320+
typename base::size_type offset, typename base::size_type count) const noexcept
316321
{
317322
return host_span{this->data() + offset, count, _is_device_accessible};
318323
}
@@ -434,8 +439,8 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
434439
* @param count The number of elements in the subspan
435440
* @return A subspan of the sequence, of requested count and offset
436441
*/
437-
[[nodiscard]] constexpr device_span subspan(typename base::size_type offset,
438-
typename base::size_type count) const noexcept
442+
[[nodiscard]] CUDF_HOST_DEVICE constexpr device_span subspan(
443+
typename base::size_type offset, typename base::size_type count) const noexcept
439444
{
440445
return device_span{this->data() + offset, count};
441446
}
@@ -475,28 +480,28 @@ class base_2dspan {
475480
*
476481
* @return A pointer to the first element of the span
477482
*/
478-
[[nodiscard]] constexpr auto data() const noexcept { return _flat.data(); }
483+
[[nodiscard]] CUDF_HOST_DEVICE constexpr auto data() const noexcept { return _flat.data(); }
479484

480485
/**
481486
* @brief Returns the size in the span as pair.
482487
*
483488
* @return pair representing rows and columns size of the span
484489
*/
485-
[[nodiscard]] constexpr auto size() const noexcept { return _size; }
490+
[[nodiscard]] CUDF_HOST_DEVICE constexpr auto size() const noexcept { return _size; }
486491

487492
/**
488493
* @brief Returns the number of elements in the span.
489494
*
490495
* @return Number of elements in the span
491496
*/
492-
[[nodiscard]] constexpr auto count() const noexcept { return _flat.size(); }
497+
[[nodiscard]] CUDF_HOST_DEVICE constexpr auto count() const noexcept { return _flat.size(); }
493498

494499
/**
495500
* @brief Checks if the span is empty.
496501
*
497502
* @return True if the span is empty, false otherwise
498503
*/
499-
[[nodiscard]] constexpr bool is_empty() const noexcept { return count() == 0; }
504+
[[nodiscard]] CUDF_HOST_DEVICE constexpr bool is_empty() const noexcept { return count() == 0; }
500505

501506
/**
502507
* @brief Returns a reference to the row-th element of the sequence.
@@ -507,7 +512,7 @@ class base_2dspan {
507512
* @param row the index of the element to access
508513
* @return A reference to the row-th element of the sequence, i.e., `data()[row]`
509514
*/
510-
constexpr RowType<T, dynamic_extent> operator[](size_t row) const
515+
CUDF_HOST_DEVICE constexpr RowType<T, dynamic_extent> operator[](size_t row) const
511516
{
512517
return _flat.subspan(row * _size.second, _size.second);
513518
}
@@ -517,7 +522,10 @@ class base_2dspan {
517522
*
518523
* @return A flattened span of the 2D span
519524
*/
520-
[[nodiscard]] constexpr RowType<T, dynamic_extent> flat_view() const { return _flat; }
525+
[[nodiscard]] CUDF_HOST_DEVICE constexpr RowType<T, dynamic_extent> flat_view() const
526+
{
527+
return _flat;
528+
}
521529

522530
/**
523531
* @brief Construct a 2D span from another 2D span of convertible type

cpp/src/io/utilities/parsing_utils.cuh

+26-23
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -171,7 +171,10 @@ constexpr uint8_t decode_digit(char c, bool* valid_flag)
171171
}
172172

173173
// Converts character to lowercase.
174-
constexpr char to_lower(char const c) { return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; }
174+
CUDF_HOST_DEVICE constexpr char to_lower(char const c)
175+
{
176+
return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
177+
}
175178

176179
/**
177180
* @brief Checks if string is infinity, case insensitive with/without sign
@@ -515,13 +518,13 @@ struct ConvertFunctor {
515518
template <typename T,
516519
CUDF_ENABLE_IF(std::is_integral_v<T> and !std::is_same_v<T, bool> and
517520
!cudf::is_fixed_point<T>())>
518-
__host__ __device__ __forceinline__ bool operator()(char const* begin,
519-
char const* end,
520-
void* out_buffer,
521-
size_t row,
522-
data_type const output_type,
523-
parse_options_view const& opts,
524-
bool as_hex = false)
521+
__device__ __forceinline__ bool operator()(char const* begin,
522+
char const* end,
523+
void* out_buffer,
524+
size_t row,
525+
data_type const output_type,
526+
parse_options_view const& opts,
527+
bool as_hex = false)
525528
{
526529
auto const value = [as_hex, &opts, begin, end]() -> cuda::std::optional<T> {
527530
// Check for user-specified true/false values
@@ -564,13 +567,13 @@ struct ConvertFunctor {
564567
* @brief Dispatch for boolean type types.
565568
*/
566569
template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, bool>)>
567-
__host__ __device__ __forceinline__ bool operator()(char const* begin,
568-
char const* end,
569-
void* out_buffer,
570-
size_t row,
571-
data_type const output_type,
572-
parse_options_view const& opts,
573-
bool as_hex)
570+
__device__ __forceinline__ bool operator()(char const* begin,
571+
char const* end,
572+
void* out_buffer,
573+
size_t row,
574+
data_type const output_type,
575+
parse_options_view const& opts,
576+
bool as_hex)
574577
{
575578
auto const value = [&opts, begin, end]() -> cuda::std::optional<T> {
576579
// Check for user-specified true/false values
@@ -593,13 +596,13 @@ struct ConvertFunctor {
593596
* is not valid. In such case, the validity mask is set to zero too.
594597
*/
595598
template <typename T, CUDF_ENABLE_IF(std::is_floating_point_v<T>)>
596-
__host__ __device__ __forceinline__ bool operator()(char const* begin,
597-
char const* end,
598-
void* out_buffer,
599-
size_t row,
600-
data_type const output_type,
601-
parse_options_view const& opts,
602-
bool as_hex)
599+
__device__ __forceinline__ bool operator()(char const* begin,
600+
char const* end,
601+
void* out_buffer,
602+
size_t row,
603+
data_type const output_type,
604+
parse_options_view const& opts,
605+
bool as_hex)
603606
{
604607
auto const value = [&opts, begin, end]() -> cuda::std::optional<T> {
605608
// Check for user-specified true/false values

cpp/src/io/utilities/trie.cuh

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2018-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -74,16 +74,14 @@ CUDF_EXPORT trie create_serialized_trie(std::vector<std::string> const& keys,
7474
/*
7575
* @brief Searches for a string in a serialized trie.
7676
*
77-
* Can be executed on host or device, as long as the data is available
78-
*
7977
* @param trie Pointer to the array of nodes that make up the trie
8078
* @param key Pointer to the start of the string to find
8179
* @param key_len Length of the string to find
8280
*
8381
* @return Boolean value; true if string is found, false otherwise
8482
*/
85-
CUDF_HOST_DEVICE inline bool serialized_trie_contains(device_span<serial_trie_node const> trie,
86-
device_span<char const> key)
83+
__device__ inline bool serialized_trie_contains(device_span<serial_trie_node const> trie,
84+
device_span<char const> key)
8785
{
8886
if (trie.empty()) { return false; }
8987
if (key.empty()) { return trie.front().is_leaf; }

cpp/tests/transform/segmented_row_bit_count_test.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ compute_segmented_row_bit_count(cudf::table_view const& input, cudf::size_type s
7474
// Since the number of rows may not divisible by segment_length,
7575
// the last segment may be shorter than the others.
7676
auto const size_begin = d_sizes + segment_idx * segment_length;
77-
auto const size_end = std::min(size_begin + segment_length, d_sizes + num_rows);
77+
auto const size_end = cuda::std::min(size_begin + segment_length, d_sizes + num_rows);
7878
return thrust::reduce(thrust::seq, size_begin, size_end);
7979
}));
8080

cpp/tests/utilities/column_utilities.cu

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -37,6 +37,8 @@
3737
#include <rmm/exec_policy.hpp>
3838

3939
#include <cuda/functional>
40+
#include <cuda/std/cmath>
41+
#include <cuda/std/limits>
4042
#include <thrust/copy.h>
4143
#include <thrust/distance.h>
4244
#include <thrust/equal.h>
@@ -412,14 +414,16 @@ class corresponding_rows_not_equivalent {
412414
T const y = rhs.element<T>(rhs_index);
413415

414416
// Must handle inf and nan separately
415-
if (std::isinf(x) || std::isinf(y)) {
417+
if (cuda::std::isinf(x) || cuda::std::isinf(y)) {
416418
return x != y; // comparison of (inf==inf) returns true
417-
} else if (std::isnan(x) || std::isnan(y)) {
418-
return std::isnan(x) != std::isnan(y); // comparison of (nan==nan) returns false
419+
} else if (cuda::std::isnan(x) || cuda::std::isnan(y)) {
420+
return cuda::std::isnan(x) !=
421+
cuda::std::isnan(y); // comparison of (nan==nan) returns false
419422
} else {
420-
T const abs_x_minus_y = std::abs(x - y);
421-
return abs_x_minus_y >= std::numeric_limits<T>::min() &&
422-
abs_x_minus_y > std::numeric_limits<T>::epsilon() * std::abs(x + y) * fp_ulps;
423+
T const abs_x_minus_y = cuda::std::abs(x - y);
424+
return abs_x_minus_y >= cuda::std::numeric_limits<T>::min() &&
425+
abs_x_minus_y >
426+
cuda::std::numeric_limits<T>::epsilon() * cuda::std::abs(x + y) * fp_ulps;
423427
}
424428
} else {
425429
// if either is null, then the inequality was checked already

python/cudf/cudf/tests/test_orc.py

+6
Original file line numberDiff line numberDiff line change
@@ -1975,8 +1975,14 @@ def test_row_group_alignment(datadir):
19751975
@pytest.mark.parametrize(
19761976
"inputfile",
19771977
[
1978+
# These sample data have a single column my_timestamp of the TIMESTAMP type,
1979+
# 2660 rows, and 1536 rows per row group.
19781980
"TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc",
19791981
"TestOrcFile.timestamp.desynced.snappy.RLEv2.orc",
1982+
# These two data are the same with the above, except that every 100 rows start
1983+
# with a null value.
1984+
"TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc",
1985+
"TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc",
19801986
],
19811987
)
19821988
def test_orc_reader_desynced_timestamp(datadir, inputfile):

0 commit comments

Comments
 (0)