Merge remote-tracking branch 'upstream/branch-23.10' into 14088

rapidsai · Sep 19, 2023 · ffce0f6 · ffce0f6
2 parents 6ba4b24 + c016b58
commit ffce0f6
Show file tree

Hide file tree

Showing 94 changed files with 724 additions and 449 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -63,7 +63,7 @@ repos:
                 # Explicitly specify the pyproject.toml at the repo root, not per-project.
                 args: ["--config=pyproject.toml"]
       - repo: https://github.com/pre-commit/mirrors-clang-format
-        rev: v16.0.1
+        rev: v16.0.6
         hooks:
               - id: clang-format
                 types_or: [c, c++, cuda]

diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
@@ -145,7 +145,7 @@ void BM_iterator(benchmark::State& state)
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     if (cub_or_thrust) {
       if (raw_or_iterator) {
-        raw_stream_bench_cub<T>(hasnull_F, dev_result);       // driven by raw pointer
+        raw_stream_bench_cub<T>(hasnull_F, dev_result);  // driven by raw pointer
       } else {
         iterator_bench_cub<T, false>(hasnull_F, dev_result);  // driven by riterator without nulls
       }

diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
@@ -59,8 +59,8 @@ void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns)
   int64_t const column_bytes_in    = column_bytes_out;  // we only read unmasked inputs
 
   int64_t const bytes_read =
-    (column_bytes_in + validity_bytes_in) * num_columns +   // reading columns
-    mask_size;                                              // reading boolean mask
+    (column_bytes_in + validity_bytes_in) * num_columns +  // reading columns
+    mask_size;                                             // reading boolean mask
   int64_t const bytes_written =
     (column_bytes_out + validity_bytes_out) * num_columns;  // writing columns
 

diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp
@@ -43,7 +43,7 @@ static void bench_char_types(nvbench::state& state)
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   // gather some throughput statistics as well
   auto chars_size = input.chars_size();
-  state.add_global_memory_reads<nvbench::int8_t>(chars_size);   // all bytes are read;
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);  // all bytes are read;
   if (api_type == "all") {
     state.add_global_memory_writes<nvbench::int8_t>(num_rows);  // output is a bool8 per row
   } else {

diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
@@ -43,7 +43,7 @@ static void bench_extract(nvbench::state& state)
   std::uniform_int_distribution<int> words_dist(0, 999);
   std::vector<std::string> samples(100);  // 100 unique rows of data to reuse
   std::generate(samples.begin(), samples.end(), [&]() {
-    std::string row;                      // build a row of random tokens
+    std::string row;  // build a row of random tokens
     while (static_cast<cudf::size_type>(row.size()) < row_width) {
       row += std::to_string(words_dist(generator)) + " ";
     }

diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
@@ -1393,7 +1393,7 @@ struct pair_accessor {
  */
 template <typename T, bool has_nulls = false>
 struct pair_rep_accessor {
-  column_device_view const col;               ///< column view of column in device
+  column_device_view const col;  ///< column view of column in device
 
   using rep_type = device_storage_type_t<T>;  ///< representation type
 

diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
@@ -133,7 +133,7 @@ __launch_bounds__(block_size) __global__
     if (has_validity) {
       temp_valids[threadIdx.x] = false;  // init shared memory
       if (threadIdx.x < cudf::detail::warp_size) temp_valids[block_size + threadIdx.x] = false;
-      __syncthreads();                   // wait for init
+      __syncthreads();  // wait for init
     }
 
     if (mask_true) {

diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh
@@ -248,7 +248,7 @@ struct input_indexalator : base_indexalator<input_indexalator> {
   friend struct indexalator_factory;
   friend struct base_indexalator<input_indexalator>;  // for CRTP
 
-  using reference = size_type const;                  // this keeps STL and thrust happy
+  using reference = size_type const;  // this keeps STL and thrust happy
 
   input_indexalator()                                    = default;
   input_indexalator(input_indexalator const&)            = default;
@@ -332,7 +332,7 @@ struct output_indexalator : base_indexalator<output_indexalator> {
   friend struct indexalator_factory;
   friend struct base_indexalator<output_indexalator>;  // for CRTP
 
-  using reference = output_indexalator const&;         // required for output iterators
+  using reference = output_indexalator const&;  // required for output iterators
 
   output_indexalator()                                     = default;
   output_indexalator(output_indexalator const&)            = default;

diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
@@ -78,8 +78,8 @@ struct hash_join {
   cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
   cudf::table_view _build;                 ///< input table to build the hash map
   std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
-    _preprocessed_build;                   ///< input table preprocssed for row operators
-  map_type _hash_table;                    ///< hash table built on `_build`
+    _preprocessed_build;  ///< input table preprocssed for row operators
+  map_type _hash_table;   ///< hash table built on `_build`
 
  public:
   /**

diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -829,5 +829,5 @@ using decimal32  = fixed_point<int32_t, Radix::BASE_10>;     ///<  32-bit decima
 using decimal64  = fixed_point<int64_t, Radix::BASE_10>;     ///<  64-bit decimal fixed point
 using decimal128 = fixed_point<__int128_t, Radix::BASE_10>;  ///< 128-bit decimal fixed point
 
-/** @} */                                                    // end of group
+/** @} */  // end of group
 }  // namespace numeric
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
@@ -386,8 +386,8 @@ class groupby {
                                                          ///< indicates null order
                                                          ///< of each column
   std::unique_ptr<detail::sort::sort_groupby_helper>
-    _helper;                                             ///< Helper object
-                                                         ///< used by sort based implementation
+    _helper;  ///< Helper object
+              ///< used by sort based implementation
 
   /**
    * @brief Get the sort helper object

diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp
@@ -213,7 +213,7 @@ class csv_reader_options {
 
     auto const max_row_bytes = 16 * 1024;  // 16KB
     auto const column_bytes  = 64;
-    auto const base_padding  = 1024;       // 1KB
+    auto const base_padding  = 1024;  // 1KB
 
     if (num_columns == 0) {
       // Use flat size if the number of columns is not known

diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
@@ -207,7 +207,7 @@ class json_reader_options {
 
     auto const max_row_bytes = 16 * 1024;  // 16KB
     auto const column_bytes  = 64;
-    auto const base_padding  = 1024;       // 1KB
+    auto const base_padding  = 1024;  // 1KB
 
     if (num_columns == 0) {
       // Use flat size if the number of columns is not known

diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp
@@ -111,10 +111,10 @@ struct string_statistics : minmax_statistics<std::string>, sum_statistics<int64_
 /**
  * @brief Statistics for boolean columns.
  *
- * The `count` array includes the count of `false` and `true` values.
+ * The `count` array contains the count of `true` values.
  */
 struct bucket_statistics {
-  std::vector<uint64_t> count;  ///< Count of `false` and `true` values
+  std::vector<uint64_t> count;  ///< count of `true` values
 };
 
 /**
@@ -141,8 +141,10 @@ using binary_statistics = sum_statistics<int64_t>;
  * the UNIX epoch. The `minimum_utc` and `maximum_utc` are the same values adjusted to UTC.
  */
 struct timestamp_statistics : minmax_statistics<int64_t> {
-  std::optional<int64_t> minimum_utc;  ///< minimum in milliseconds
-  std::optional<int64_t> maximum_utc;  ///< maximum in milliseconds
+  std::optional<int64_t> minimum_utc;    ///< minimum in milliseconds
+  std::optional<int64_t> maximum_utc;    ///< maximum in milliseconds
+  std::optional<int32_t> minimum_nanos;  ///< nanoseconds part of the minimum
+  std::optional<int32_t> maximum_nanos;  ///< nanoseconds part of the maximum
 };
 
 namespace orc {

diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,16 +50,18 @@ namespace strings {
  *
  * Any null string entries return corresponding null output column entries.
  *
- * @throw cudf::logic_error if `delimiter.is_valid()` is  `false`.
+ * @throw cudf::logic_error if `delimiter.is_valid()` is `false`.
  *
- * @param input String column.
- * @param delimiters Characters for identifying words to capitalize.
+ * @param input String column
+ * @param delimiters Characters for identifying words to capitalize
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Column of strings capitalized from the input column.
+ * @return Column of strings capitalized from the input column
  */
 std::unique_ptr<column> capitalize(
   strings_column_view const& input,
-  string_scalar const& delimiters     = string_scalar(""),
+  string_scalar const& delimiters     = string_scalar("", true, cudf::get_default_stream()),
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -83,14 +85,16 @@ std::unique_ptr<column> capitalize(
  *
  * Any null string entries return corresponding null output column entries.
  *
- * @param input String column.
- * @param sequence_type The character type that is used when identifying words.
+ * @param input String column
+ * @param sequence_type The character type that is used when identifying words
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Column of titled strings.
+ * @return Column of titled strings
  */
 std::unique_ptr<column> title(
   strings_column_view const& input,
   string_character_types sequence_type = string_character_types::ALPHA,
+  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
 
 /**
@@ -112,12 +116,14 @@ std::unique_ptr<column> title(
  *
  * Any null string entries result in corresponding null output column entries.
  *
- * @param input String column.
+ * @param input String column
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Column of type BOOL8.
+ * @return Column of type BOOL8
  */
 std::unique_ptr<column> is_title(
   strings_column_view const& input,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of doxygen group

diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,11 +38,13 @@ namespace strings {
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_lower(
   strings_column_view const& strings,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -55,11 +57,13 @@ std::unique_ptr<column> to_lower(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_upper(
   strings_column_view const& strings,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -73,11 +77,13 @@ std::unique_ptr<column> to_upper(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> swapcase(
   strings_column_view const& strings,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of doxygen group

diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point_to_string.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point_to_string.cuh
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/detail/convert/int_to_string.cuh>
+
+namespace cudf::strings::detail {
+
+/**
+ * @brief Returns the number of digits in the given fixed point number.
+ *
+ * @param value The value of the fixed point number
+ * @param scale The scale of the fixed point number
+ * @return int32_t The number of digits required to represent the fixed point number
+ */
+__device__ inline int32_t fixed_point_string_size(__int128_t const& value, int32_t scale)
+{
+  if (scale >= 0) return count_digits(value) + scale;
+
+  auto const abs_value = numeric::detail::abs(value);
+  auto const exp_ten   = numeric::detail::exp10<__int128_t>(-scale);
+  auto const fraction  = count_digits(abs_value % exp_ten);
+  auto const num_zeros = std::max(0, (-scale - fraction));
+  return static_cast<int32_t>(value < 0) +    // sign if negative
+         count_digits(abs_value / exp_ten) +  // integer
+         1 +                                  // decimal point
+         num_zeros +                          // zeros padding
+         fraction;                            // size of fraction
+}
+
+/**
+ * @brief Converts the given fixed point number to a string.
+ *
+ * Caller is responsible for ensuring that the output buffer is large enough. The required output
+ * buffer size can be obtained by calling `fixed_point_string_size`.
+ *
+ * @param value The value of the fixed point number
+ * @param scale The scale of the fixed point number
+ * @param out_ptr The pointer to the output string
+ */
+__device__ inline void fixed_point_to_string(__int128_t const& value, int32_t scale, char* out_ptr)
+{
+  if (scale >= 0) {
+    out_ptr += integer_to_string(value, out_ptr);
+    thrust::generate_n(thrust::seq, out_ptr, scale, []() { return '0'; });  // add zeros
+    return;
+  }
+
+  // scale < 0
+  // write format:   [-]integer.fraction
+  // where integer  = abs(value) / (10^abs(scale))
+  //       fraction = abs(value) % (10^abs(scale))
+  if (value < 0) *out_ptr++ = '-';  // add sign
+  auto const abs_value = numeric::detail::abs(value);
+  auto const exp_ten   = numeric::detail::exp10<__int128_t>(-scale);
+  auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten)));
+
+  out_ptr += integer_to_string(abs_value / exp_ten, out_ptr);  // add the integer part
+  *out_ptr++ = '.';                                            // add decimal point
+
+  thrust::generate_n(thrust::seq, out_ptr, num_zeros, []() { return '0'; });  // add zeros
+  out_ptr += num_zeros;
+
+  integer_to_string(abs_value % exp_ten, out_ptr);  // add the fraction part
+}
+
+}  // namespace cudf::strings::detail