Skip to content

Commit

Permalink
Correct numerous 20054-D: dynamic initialization errors found on arm+…
Browse files Browse the repository at this point in the history
…12.2 (#14108)

Compile issues found by compiling libcudf with the `rapidsai/devcontainers:23.10-cpp-gcc9-cuda12.2-ubuntu20.04` docker container.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #14108
  • Loading branch information
robertmaynard authored Sep 27, 2023
1 parent ce24796 commit a97020f
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 138 deletions.
3 changes: 2 additions & 1 deletion cpp/src/io/avro/avro_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ namespace cudf {
namespace io {
namespace avro {
struct block_desc_s {
block_desc_s() {}
block_desc_s() = default; // required to compile on ctk-12.2 + aarch64

explicit constexpr block_desc_s(
size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_)
: offset(offset_),
Expand Down
18 changes: 11 additions & 7 deletions cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct unsnap_batch_s {
* @brief Queue structure used to exchange data between warps
*/
struct unsnap_queue_s {
unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64

uint32_t prefetch_wrpos; ///< Prefetcher write position
uint32_t prefetch_rdpos; ///< Prefetch consumer read position
int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher)
Expand All @@ -64,13 +66,15 @@ struct unsnap_queue_s {
* @brief snappy decompression state
*/
struct unsnap_state_s {
uint8_t const* base; ///< base ptr of compressed stream
uint8_t const* end; ///< end of compressed stream
uint32_t uncompressed_size; ///< uncompressed stream size
uint32_t bytes_left; ///< remaining bytes to decompress
int32_t error; ///< current error status
uint32_t tstart; ///< start time for perf logging
volatile unsnap_queue_s q; ///< queue for cross-warp communication
constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64

uint8_t const* base{}; ///< base ptr of compressed stream
uint8_t const* end{}; ///< end of compressed stream
uint32_t uncompressed_size{}; ///< uncompressed stream size
uint32_t bytes_left{}; ///< remaining bytes to decompress
int32_t error{}; ///< current error status
uint32_t tstart{}; ///< start time for perf logging
volatile unsnap_queue_s q{}; ///< queue for cross-warp communication
device_span<uint8_t const> src; ///< input for current block
device_span<uint8_t> dst; ///< output for current block
};
Expand Down
39 changes: 16 additions & 23 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,24 @@ struct CompressedStreamInfo {
explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_)
: compressed_data(compressed_data_),
uncompressed_data(nullptr),
compressed_data_size(compressed_size_),
dec_in_ctl(nullptr),
dec_out_ctl(nullptr),
copy_in_ctl(nullptr),
copy_out_ctl(nullptr),
num_compressed_blocks(0),
num_uncompressed_blocks(0),
max_uncompressed_size(0),
max_uncompressed_block_size(0)
compressed_data_size(compressed_size_)
{
}
uint8_t const* compressed_data; // [in] base ptr to compressed stream data
uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl; // [in] output buffer to decompress into
device_span<compression_result> dec_res; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl; // [out] output buffer to copy to
uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed
// blocks(out)
uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream
uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data
uint8_t*
uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size{}; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl{}; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl{}; // [in] output buffer to decompress into
device_span<compression_result> dec_res{}; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl{}; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl{}; // [out] output buffer to copy to
uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of
// compressed blocks(out)
uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream
};

enum StreamIndexType {
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ __global__ void __launch_bounds__(block_size, 1)
}

struct stats_state_s {
uint8_t* base; ///< Output buffer start
uint8_t* end; ///< Output buffer end
statistics_chunk chunk;
statistics_merge_group group;
statistics_dtype stats_dtype; //!< Statistics data type for this column
uint8_t* base{}; ///< Output buffer start
uint8_t* end{}; ///< Output buffer end
statistics_chunk chunk{};
statistics_merge_group group{};
statistics_dtype stats_dtype{}; //!< Statistics data type for this column
};

/*
Expand Down
29 changes: 15 additions & 14 deletions cpp/src/io/orc/stripe_init.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ namespace orc {
namespace gpu {

struct comp_in_out {
uint8_t const* in_ptr;
size_t in_size;
uint8_t* out_ptr;
size_t out_size;
uint8_t const* in_ptr{};
size_t in_size{};
uint8_t* out_ptr{};
size_t out_size{};
};
struct compressed_stream_s {
CompressedStreamInfo info;
comp_in_out ctl;
CompressedStreamInfo info{};
comp_in_out ctl{};
};

// blockDim {128,1,1}
Expand Down Expand Up @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8)
* @brief Shared mem state for gpuParseRowGroupIndex
*/
struct rowindex_state_s {
ColumnDesc chunk;
uint32_t rowgroup_start;
uint32_t rowgroup_end;
int is_compressed;
uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2];
RowGroup rowgroups[128];
uint32_t compressed_offset[128][2];
ColumnDesc chunk{};
uint32_t rowgroup_start{};
uint32_t rowgroup_end{};
int is_compressed{};
uint32_t row_index_entry[3]
[CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2]{};
RowGroup rowgroups[128]{};
uint32_t compressed_offset[128][2]{};
};

enum row_entry_state_e {
Expand Down
67 changes: 34 additions & 33 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,48 +26,49 @@
namespace cudf::io::parquet::gpu {

struct page_state_s {
uint8_t const* data_start;
uint8_t const* data_end;
uint8_t const* lvl_end;
uint8_t const* dict_base; // ptr to dictionary page data
int32_t dict_size; // size of dictionary data
int32_t first_row; // First row in page to output
int32_t num_rows; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value; // First value in page to output
int32_t num_input_values; // total # of input/level values in the page
int32_t dtype_len; // Output data type length
int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits; // # of bits to store dictionary indices
uint32_t dict_run;
int32_t dict_val;
uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep]
int32_t error;
PageInfo page;
ColumnChunkDesc col;
constexpr page_state_s() noexcept {}
uint8_t const* data_start{};
uint8_t const* data_end{};
uint8_t const* lvl_end{};
uint8_t const* dict_base{}; // ptr to dictionary page data
int32_t dict_size{}; // size of dictionary data
int32_t first_row{}; // First row in page to output
int32_t num_rows{}; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value{}; // First value in page to output
int32_t num_input_values{}; // total # of input/level values in the page
int32_t dtype_len{}; // Output data type length
int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits{}; // # of bits to store dictionary indices
uint32_t dict_run{};
int32_t dict_val{};
uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t error{};
PageInfo page{};
ColumnChunkDesc col{};

// (leaf) value decoding
int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos; // write position of dictionary indices
int32_t src_pos; // input read position of final output value
int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos{}; // write position of dictionary indices
int32_t src_pos{}; // input read position of final output value
int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale

// repetition/definition level decoding
int32_t input_value_count; // how many values of the input we've processed
int32_t input_row_count; // how many rows of the input we've processed
int32_t input_leaf_count; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded
int32_t row_index_lower_bound; // lower bound of row indices we should process
int32_t input_value_count{}; // how many values of the input we've processed
int32_t input_row_count{}; // how many rows of the input we've processed
int32_t input_leaf_count{}; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded
int32_t row_index_lower_bound{}; // lower bound of row indices we should process

// a shared-memory cache of frequently used data when decoding. The source of this data is
// normally stored in global memory which can yield poor performance. So, when possible
// we copy that info here prior to decoding
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info];
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{};
// points to either nesting_decode_cache above when possible, or to the global source otherwise
PageNestingDecodeInfo* nesting_info;
PageNestingDecodeInfo* nesting_info{};
};

// buffers only used in the decode kernel. separated from page_state_s to keep
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/parquet/page_hdr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0,
ST_FLD_LIST};

struct byte_stream_s {
uint8_t const* cur;
uint8_t const* end;
uint8_t const* base;
uint8_t const* cur{};
uint8_t const* end{};
uint8_t const* base{};
// Parsed symbols
PageType page_type;
PageInfo page;
ColumnChunkDesc ck;
PageType page_type{};
PageInfo page{};
ColumnChunkDesc ck{};
};

/**
Expand Down
56 changes: 28 additions & 28 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ struct PageInfo {
* @brief Struct describing a particular chunk of column data
*/
struct ColumnChunkDesc {
ColumnChunkDesc() = default;
constexpr ColumnChunkDesc() noexcept {};
explicit ColumnChunkDesc(size_t compressed_size_,
uint8_t* compressed_data_,
size_t num_values_,
Expand Down Expand Up @@ -275,34 +275,34 @@ struct ColumnChunkDesc {
{
}

uint8_t const* compressed_data; // pointer to compressed column chunk data
size_t compressed_size; // total compressed data size for this chunk
size_t num_values; // total number of values in this column
size_t start_row; // starting row of this chunk
uint32_t num_rows; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level
int16_t max_nesting_depth; // max nesting depth of the output
uint16_t data_type; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t const* compressed_data{}; // pointer to compressed column chunk data
size_t compressed_size{}; // total compressed data size for this chunk
size_t num_values{}; // total number of values in this column
size_t start_row{}; // starting row of this chunk
uint32_t num_rows{}; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level
int16_t max_nesting_depth{}; // max nesting depth of the output
uint16_t data_type{}; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t
level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels
int32_t num_data_pages; // number of data pages
int32_t num_dict_pages; // number of dictionary pages
int32_t max_num_pages; // size of page_info array
PageInfo* page_info; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index; // index for string dictionary
bitmask_type** valid_map_base; // base pointers of valid bit map for this column
void** column_data_base; // base pointers of column data
void** column_string_base; // base pointers of column string data
int8_t codec; // compressed codec enum
int8_t converted_type; // converted type enum
LogicalType logical_type; // logical type
int8_t decimal_precision; // Decimal precision
int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index; // my input column index
int32_t src_col_schema; // my schema index in the file
level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels
int32_t num_data_pages{}; // number of data pages
int32_t num_dict_pages{}; // number of dictionary pages
int32_t max_num_pages{}; // size of page_info array
PageInfo* page_info{}; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index{}; // index for string dictionary
bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column
void** column_data_base{}; // base pointers of column data
void** column_string_base{}; // base pointers of column string data
int8_t codec{}; // compressed codec enum
int8_t converted_type{}; // converted type enum
LogicalType logical_type{}; // logical type
int8_t decimal_precision{}; // Decimal precision
int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index{}; // my input column index
int32_t src_col_schema{}; // my schema index in the file
};

/**
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/statistics/column_statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ namespace io {
* @brief shared state for statistics calculation kernel
*/
struct stats_state_s {
stats_column_desc col; ///< Column information
statistics_group group; ///< Group description
statistics_chunk ck; ///< Output statistics chunk
stats_column_desc col{}; ///< Column information
statistics_group group{}; ///< Group description
statistics_chunk ck{}; ///< Output statistics chunk
};

/**
* @brief shared state for statistics merge kernel
*/
struct merge_state_s {
stats_column_desc col; ///< Column information
statistics_merge_group group; ///< Group description
statistics_chunk ck; ///< Resulting statistics chunk
stats_column_desc col{}; ///< Column information
statistics_merge_group group{}; ///< Group description
statistics_chunk ck{}; ///< Resulting statistics chunk
};

template <int dimension>
Expand Down
30 changes: 15 additions & 15 deletions cpp/src/io/statistics/statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,27 @@ union statistics_val {
};

struct statistics_chunk {
uint32_t non_nulls; //!< number of non-null values in chunk
uint32_t null_count; //!< number of null values in chunk
statistics_val min_value; //!< minimum value in chunk
statistics_val max_value; //!< maximum value in chunk
statistics_val sum; //!< sum of chunk
uint8_t has_minmax; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum; //!< Nonzero if sum is valid
uint32_t non_nulls{}; //!< number of non-null values in chunk
uint32_t null_count{}; //!< number of null values in chunk
statistics_val min_value{}; //!< minimum value in chunk
statistics_val max_value{}; //!< maximum value in chunk
statistics_val sum{}; //!< sum of chunk
uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum{}; //!< Nonzero if sum is valid
};

struct statistics_group {
stats_column_desc const* col; //!< Column information
uint32_t start_row; //!< Start row of this group
uint32_t num_rows; //!< Number of rows in group
uint32_t non_leaf_nulls; //!< Number of null non-leaf values in the group
stats_column_desc const* col{}; //!< Column information
uint32_t start_row{}; //!< Start row of this group
uint32_t num_rows{}; //!< Number of rows in group
uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group
};

struct statistics_merge_group {
data_type col_dtype; //!< Column data type
statistics_dtype stats_dtype; //!< Statistics data type for this column
uint32_t start_chunk; //!< Start chunk of this group
uint32_t num_chunks; //!< Number of chunks in group
data_type col_dtype; //!< Column data type
statistics_dtype stats_dtype{dtype_none}; //!< Statistics data type for this column
uint32_t start_chunk{}; //!< Start chunk of this group
uint32_t num_chunks{}; //!< Number of chunks in group
};

template <typename T, std::enable_if_t<!std::is_same_v<T, statistics::byte_array_view>>* = nullptr>
Expand Down

0 comments on commit a97020f

Please sign in to comment.