Skip to content

Commit 00feb82

Browse files
authored
Limit the number of keys to calculate column sizes and page starts in PQ reader to 1B (#17059)
This PR limits the number of keys to use at a time to calculate column `sizes` and `page_start_values` to 1B averting possible OOM and UB from implicit typecasting of `size_t` iterator to `size_type` iterators in `thrust::reduce_by_key`. Closes #16985 Closes #17086 ## Resolved - [x] Add tests - [x] Debug with fingerprinting structs table for a possible bug in PQ writer (nothing seems wrong with the writer as pyarrow is able to read the written parquet files). Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Bradley Dice (https://github.com/bdice) - Vukasin Milovanovic (https://github.com/vuule) - Yunsong Wang (https://github.com/PointKernel) URL: #17059
1 parent 920a5f6 commit 00feb82

File tree

2 files changed

+96
-28
lines changed

2 files changed

+96
-28
lines changed

cpp/src/io/parquet/reader_impl_preprocess.cu

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <thrust/unique.h>
4545

4646
#include <bitset>
47+
#include <limits>
4748
#include <numeric>
4849

4950
namespace cudf::io::parquet::detail {
@@ -1592,36 +1593,68 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
15921593
auto const d_cols_info = cudf::detail::make_device_uvector_async(
15931594
h_cols_info, _stream, cudf::get_current_device_resource_ref());
15941595

1595-
auto const num_keys = _input_columns.size() * max_depth * subpass.pages.size();
1596-
// size iterator. indexes pages by sorted order
1597-
rmm::device_uvector<size_type> size_input{num_keys, _stream};
1598-
thrust::transform(
1599-
rmm::exec_policy(_stream),
1600-
thrust::make_counting_iterator<size_type>(0),
1601-
thrust::make_counting_iterator<size_type>(num_keys),
1602-
size_input.begin(),
1603-
get_page_nesting_size{
1604-
d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()});
1605-
auto const reduction_keys =
1606-
cudf::detail::make_counting_transform_iterator(0, get_reduction_key{subpass.pages.size()});
1596+
// Vector to store page sizes for each column at each depth
16071597
cudf::detail::hostdevice_vector<size_t> sizes{_input_columns.size() * max_depth, _stream};
16081598

1609-
// find the size of each column
1610-
thrust::reduce_by_key(rmm::exec_policy(_stream),
1611-
reduction_keys,
1612-
reduction_keys + num_keys,
1613-
size_input.cbegin(),
1614-
thrust::make_discard_iterator(),
1615-
sizes.d_begin());
1616-
1617-
// for nested hierarchies, compute per-page start offset
1618-
thrust::exclusive_scan_by_key(
1619-
rmm::exec_policy(_stream),
1620-
reduction_keys,
1621-
reduction_keys + num_keys,
1622-
size_input.cbegin(),
1623-
start_offset_output_iterator{
1624-
subpass.pages.device_begin(), 0, d_cols_info.data(), max_depth, subpass.pages.size()});
1599+
// Total number of keys to process
1600+
auto const num_keys = _input_columns.size() * max_depth * subpass.pages.size();
1601+
1602+
// Maximum 1 billion keys processed per iteration
1603+
auto constexpr max_keys_per_iter =
1604+
static_cast<size_t>(std::numeric_limits<size_type>::max() / 2);
1605+
1606+
// Number of keys for per each column
1607+
auto const num_keys_per_col = max_depth * subpass.pages.size();
1608+
1609+
// The largest multiple of `num_keys_per_col` that is <= `num_keys`
1610+
auto const num_keys_per_iter =
1611+
num_keys <= max_keys_per_iter
1612+
? num_keys
1613+
: num_keys_per_col * std::max<size_t>(1, max_keys_per_iter / num_keys_per_col);
1614+
1615+
// Size iterator. Indexes pages by sorted order
1616+
rmm::device_uvector<size_type> size_input{num_keys_per_iter, _stream};
1617+
1618+
// To keep track of the starting key of an iteration
1619+
size_t key_start = 0;
1620+
// Loop until all keys are processed
1621+
while (key_start < num_keys) {
1622+
// Number of keys processed in this iteration
1623+
auto const num_keys_this_iter = std::min<size_t>(num_keys_per_iter, num_keys - key_start);
1624+
thrust::transform(
1625+
rmm::exec_policy_nosync(_stream),
1626+
thrust::make_counting_iterator<size_t>(key_start),
1627+
thrust::make_counting_iterator<size_t>(key_start + num_keys_this_iter),
1628+
size_input.begin(),
1629+
get_page_nesting_size{
1630+
d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()});
1631+
1632+
// Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid
1633+
// implicit casting to size_type.
1634+
auto const reduction_keys = thrust::make_transform_iterator(
1635+
thrust::make_counting_iterator<size_t>(key_start), get_reduction_key{subpass.pages.size()});
1636+
1637+
// Find the size of each column
1638+
thrust::reduce_by_key(rmm::exec_policy_nosync(_stream),
1639+
reduction_keys,
1640+
reduction_keys + num_keys_this_iter,
1641+
size_input.cbegin(),
1642+
thrust::make_discard_iterator(),
1643+
sizes.d_begin() + (key_start / subpass.pages.size()));
1644+
1645+
// For nested hierarchies, compute per-page start offset
1646+
thrust::exclusive_scan_by_key(rmm::exec_policy_nosync(_stream),
1647+
reduction_keys,
1648+
reduction_keys + num_keys_this_iter,
1649+
size_input.cbegin(),
1650+
start_offset_output_iterator{subpass.pages.device_begin(),
1651+
key_start,
1652+
d_cols_info.data(),
1653+
max_depth,
1654+
subpass.pages.size()});
1655+
// Increment the key_start
1656+
key_start += num_keys_this_iter;
1657+
}
16251658

16261659
sizes.device_to_host_sync(_stream);
16271660
for (size_type idx = 0; idx < static_cast<size_type>(_input_columns.size()); idx++) {

cpp/tests/io/parquet_reader_test.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2724,3 +2724,38 @@ TYPED_TEST(ParquetReaderPredicatePushdownTest, FilterTyped)
27242724
EXPECT_EQ(result_table.num_columns(), expected->num_columns());
27252725
CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result_table);
27262726
}
2727+
2728+
TEST_F(ParquetReaderTest, ListsWideTable)
2729+
{
2730+
auto constexpr num_rows = 2;
2731+
auto constexpr num_cols = 26'755; // for slightly over 2B keys
2732+
auto constexpr seed = 0xceed;
2733+
2734+
std::mt19937 engine{seed};
2735+
2736+
auto list_list = make_parquet_list_list_col<int32_t>(0, num_rows, 1, 1, false);
2737+
auto list_list_nulls = make_parquet_list_list_col<int32_t>(0, num_rows, 1, 1, true);
2738+
2739+
// switch between nullable and non-nullable
2740+
std::vector<cudf::column_view> cols(num_cols);
2741+
bool with_nulls = false;
2742+
std::generate_n(cols.begin(), num_cols, [&]() {
2743+
auto const view = with_nulls ? list_list_nulls->view() : list_list->view();
2744+
with_nulls = not with_nulls;
2745+
return view;
2746+
});
2747+
2748+
cudf::table_view expected(cols);
2749+
2750+
// Use a host buffer for faster I/O
2751+
std::vector<char> buffer;
2752+
auto const out_opts =
2753+
cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buffer}, expected).build();
2754+
cudf::io::write_parquet(out_opts);
2755+
2756+
cudf::io::parquet_reader_options default_in_opts =
2757+
cudf::io::parquet_reader_options::builder(cudf::io::source_info(buffer.data(), buffer.size()));
2758+
auto const [result, _] = cudf::io::read_parquet(default_in_opts);
2759+
2760+
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result->view());
2761+
}

0 commit comments

Comments
 (0)