Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 #14108

Merged
3 changes: 2 additions & 1 deletion cpp/src/io/avro/avro_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ namespace cudf {
namespace io {
namespace avro {
struct block_desc_s {
block_desc_s() {}
block_desc_s() = default; // required to compile on ctk-12.2 + aarch64

explicit constexpr block_desc_s(
size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_)
: offset(offset_),
Expand Down
18 changes: 11 additions & 7 deletions cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct unsnap_batch_s {
* @brief Queue structure used to exchange data between warps
*/
struct unsnap_queue_s {
unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64

uint32_t prefetch_wrpos; ///< Prefetcher write position
uint32_t prefetch_rdpos; ///< Prefetch consumer read position
int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher)
Expand All @@ -64,13 +66,15 @@ struct unsnap_queue_s {
* @brief snappy decompression state
*/
struct unsnap_state_s {
uint8_t const* base; ///< base ptr of compressed stream
uint8_t const* end; ///< end of compressed stream
uint32_t uncompressed_size; ///< uncompressed stream size
uint32_t bytes_left; ///< remaining bytes to decompress
int32_t error; ///< current error status
uint32_t tstart; ///< start time for perf logging
volatile unsnap_queue_s q; ///< queue for cross-warp communication
constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64

uint8_t const* base{}; ///< base ptr of compressed stream
uint8_t const* end{}; ///< end of compressed stream
uint32_t uncompressed_size{}; ///< uncompressed stream size
uint32_t bytes_left{}; ///< remaining bytes to decompress
int32_t error{}; ///< current error status
uint32_t tstart{}; ///< start time for perf logging
volatile unsnap_queue_s q{}; ///< queue for cross-warp communication
device_span<uint8_t const> src; ///< input for current block
device_span<uint8_t> dst; ///< output for current block
};
Expand Down
39 changes: 16 additions & 23 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,24 @@ struct CompressedStreamInfo {
explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_)
: compressed_data(compressed_data_),
uncompressed_data(nullptr),
compressed_data_size(compressed_size_),
dec_in_ctl(nullptr),
dec_out_ctl(nullptr),
copy_in_ctl(nullptr),
copy_out_ctl(nullptr),
num_compressed_blocks(0),
num_uncompressed_blocks(0),
max_uncompressed_size(0),
max_uncompressed_block_size(0)
compressed_data_size(compressed_size_)
{
}
uint8_t const* compressed_data; // [in] base ptr to compressed stream data
uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl; // [in] output buffer to decompress into
device_span<compression_result> dec_res; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl; // [out] output buffer to copy to
uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed
// blocks(out)
uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream
uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data
uint8_t*
uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size{}; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl{}; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl{}; // [in] output buffer to decompress into
device_span<compression_result> dec_res{}; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl{}; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl{}; // [out] output buffer to copy to
uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of
// compressed blocks(out)
uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream
};

enum StreamIndexType {
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ __global__ void __launch_bounds__(block_size, 1)
}

struct stats_state_s {
uint8_t* base; ///< Output buffer start
uint8_t* end; ///< Output buffer end
statistics_chunk chunk;
statistics_merge_group group;
statistics_dtype stats_dtype; //!< Statistics data type for this column
uint8_t* base{}; ///< Output buffer start
uint8_t* end{}; ///< Output buffer end
statistics_chunk chunk{};
statistics_merge_group group{};
statistics_dtype stats_dtype{}; //!< Statistics data type for this column
};

/*
Expand Down
29 changes: 15 additions & 14 deletions cpp/src/io/orc/stripe_init.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ namespace orc {
namespace gpu {

struct comp_in_out {
uint8_t const* in_ptr;
size_t in_size;
uint8_t* out_ptr;
size_t out_size;
uint8_t const* in_ptr{};
size_t in_size{};
uint8_t* out_ptr{};
size_t out_size{};
};
struct compressed_stream_s {
CompressedStreamInfo info;
comp_in_out ctl;
CompressedStreamInfo info{};
comp_in_out ctl{};
};

// blockDim {128,1,1}
Expand Down Expand Up @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8)
* @brief Shared mem state for gpuParseRowGroupIndex
*/
struct rowindex_state_s {
ColumnDesc chunk;
uint32_t rowgroup_start;
uint32_t rowgroup_end;
int is_compressed;
uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2];
RowGroup rowgroups[128];
uint32_t compressed_offset[128][2];
ColumnDesc chunk{};
uint32_t rowgroup_start{};
uint32_t rowgroup_end{};
int is_compressed{};
uint32_t row_index_entry[3]
[CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2]{};
RowGroup rowgroups[128]{};
uint32_t compressed_offset[128][2]{};
};

enum row_entry_state_e {
Expand Down
67 changes: 34 additions & 33 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,48 +26,49 @@
namespace cudf::io::parquet::gpu {

struct page_state_s {
uint8_t const* data_start;
uint8_t const* data_end;
uint8_t const* lvl_end;
uint8_t const* dict_base; // ptr to dictionary page data
int32_t dict_size; // size of dictionary data
int32_t first_row; // First row in page to output
int32_t num_rows; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value; // First value in page to output
int32_t num_input_values; // total # of input/level values in the page
int32_t dtype_len; // Output data type length
int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits; // # of bits to store dictionary indices
uint32_t dict_run;
int32_t dict_val;
uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep]
int32_t error;
PageInfo page;
ColumnChunkDesc col;
constexpr page_state_s() noexcept {}
uint8_t const* data_start{};
uint8_t const* data_end{};
uint8_t const* lvl_end{};
uint8_t const* dict_base{}; // ptr to dictionary page data
int32_t dict_size{}; // size of dictionary data
int32_t first_row{}; // First row in page to output
int32_t num_rows{}; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value{}; // First value in page to output
int32_t num_input_values{}; // total # of input/level values in the page
int32_t dtype_len{}; // Output data type length
int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits{}; // # of bits to store dictionary indices
uint32_t dict_run{};
int32_t dict_val{};
uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t error{};
PageInfo page{};
ColumnChunkDesc col{};

// (leaf) value decoding
int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos; // write position of dictionary indices
int32_t src_pos; // input read position of final output value
int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos{}; // write position of dictionary indices
int32_t src_pos{}; // input read position of final output value
int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale

// repetition/definition level decoding
int32_t input_value_count; // how many values of the input we've processed
int32_t input_row_count; // how many rows of the input we've processed
int32_t input_leaf_count; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded
int32_t row_index_lower_bound; // lower bound of row indices we should process
int32_t input_value_count{}; // how many values of the input we've processed
int32_t input_row_count{}; // how many rows of the input we've processed
int32_t input_leaf_count{}; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded
int32_t row_index_lower_bound{}; // lower bound of row indices we should process

// a shared-memory cache of frequently used data when decoding. The source of this data is
// normally stored in global memory which can yield poor performance. So, when possible
// we copy that info here prior to decoding
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info];
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{};
// points to either nesting_decode_cache above when possible, or to the global source otherwise
PageNestingDecodeInfo* nesting_info;
PageNestingDecodeInfo* nesting_info{};
};

// buffers only used in the decode kernel. separated from page_state_s to keep
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/parquet/page_hdr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0,
ST_FLD_LIST};

struct byte_stream_s {
uint8_t const* cur;
uint8_t const* end;
uint8_t const* base;
uint8_t const* cur{};
uint8_t const* end{};
uint8_t const* base{};
// Parsed symbols
PageType page_type;
PageInfo page;
ColumnChunkDesc ck;
PageType page_type{};
PageInfo page{};
ColumnChunkDesc ck{};
};

/**
Expand Down
56 changes: 28 additions & 28 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ struct PageInfo {
* @brief Struct describing a particular chunk of column data
*/
struct ColumnChunkDesc {
ColumnChunkDesc() = default;
constexpr ColumnChunkDesc() noexcept {};
Copy link
Contributor

@karthikeyann karthikeyann Sep 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does default here not meet 14.5.3.1. Device Memory Space Specifiers specifications?
is default not considered as trivial constructor if all non-static members have trivial constructors?

I see usage of default in this same PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need the constexpr constructor for the byte_stream_s type in page_hdr.cu. I agree that on initial read the = default constructor should be sufficient, but the compiler disagrees.

explicit ColumnChunkDesc(size_t compressed_size_,
uint8_t* compressed_data_,
size_t num_values_,
Expand Down Expand Up @@ -275,34 +275,34 @@ struct ColumnChunkDesc {
{
}

uint8_t const* compressed_data; // pointer to compressed column chunk data
size_t compressed_size; // total compressed data size for this chunk
size_t num_values; // total number of values in this column
size_t start_row; // starting row of this chunk
uint32_t num_rows; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level
int16_t max_nesting_depth; // max nesting depth of the output
uint16_t data_type; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t const* compressed_data{}; // pointer to compressed column chunk data
size_t compressed_size{}; // total compressed data size for this chunk
size_t num_values{}; // total number of values in this column
size_t start_row{}; // starting row of this chunk
uint32_t num_rows{}; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level
int16_t max_nesting_depth{}; // max nesting depth of the output
uint16_t data_type{}; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t
level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels
int32_t num_data_pages; // number of data pages
int32_t num_dict_pages; // number of dictionary pages
int32_t max_num_pages; // size of page_info array
PageInfo* page_info; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index; // index for string dictionary
bitmask_type** valid_map_base; // base pointers of valid bit map for this column
void** column_data_base; // base pointers of column data
void** column_string_base; // base pointers of column string data
int8_t codec; // compressed codec enum
int8_t converted_type; // converted type enum
LogicalType logical_type; // logical type
int8_t decimal_precision; // Decimal precision
int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index; // my input column index
int32_t src_col_schema; // my schema index in the file
level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels
int32_t num_data_pages{}; // number of data pages
int32_t num_dict_pages{}; // number of dictionary pages
int32_t max_num_pages{}; // size of page_info array
PageInfo* page_info{}; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index{}; // index for string dictionary
bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column
void** column_data_base{}; // base pointers of column data
void** column_string_base{}; // base pointers of column string data
int8_t codec{}; // compressed codec enum
int8_t converted_type{}; // converted type enum
LogicalType logical_type{}; // logical type
int8_t decimal_precision{}; // Decimal precision
int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index{}; // my input column index
int32_t src_col_schema{}; // my schema index in the file
};

/**
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/statistics/column_statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ namespace io {
* @brief shared state for statistics calculation kernel
*/
struct stats_state_s {
stats_column_desc col; ///< Column information
statistics_group group; ///< Group description
statistics_chunk ck; ///< Output statistics chunk
stats_column_desc col{}; ///< Column information
statistics_group group{}; ///< Group description
statistics_chunk ck{}; ///< Output statistics chunk
};

/**
* @brief shared state for statistics merge kernel
*/
struct merge_state_s {
stats_column_desc col; ///< Column information
statistics_merge_group group; ///< Group description
statistics_chunk ck; ///< Resulting statistics chunk
stats_column_desc col{}; ///< Column information
statistics_merge_group group{}; ///< Group description
statistics_chunk ck{}; ///< Resulting statistics chunk
};

template <int dimension>
Expand Down
30 changes: 15 additions & 15 deletions cpp/src/io/statistics/statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,27 @@ union statistics_val {
};

struct statistics_chunk {
uint32_t non_nulls; //!< number of non-null values in chunk
uint32_t null_count; //!< number of null values in chunk
statistics_val min_value; //!< minimum value in chunk
statistics_val max_value; //!< maximum value in chunk
statistics_val sum; //!< sum of chunk
uint8_t has_minmax; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum; //!< Nonzero if sum is valid
uint32_t non_nulls{}; //!< number of non-null values in chunk
uint32_t null_count{}; //!< number of null values in chunk
statistics_val min_value{}; //!< minimum value in chunk
statistics_val max_value{}; //!< maximum value in chunk
statistics_val sum{}; //!< sum of chunk
uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum{}; //!< Nonzero if sum is valid
};

struct statistics_group {
stats_column_desc const* col; //!< Column information
uint32_t start_row; //!< Start row of this group
uint32_t num_rows; //!< Number of rows in group
uint32_t non_leaf_nulls; //!< Number of null non-leaf values in the group
stats_column_desc const* col{}; //!< Column information
uint32_t start_row{}; //!< Start row of this group
uint32_t num_rows{}; //!< Number of rows in group
uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group
};

struct statistics_merge_group {
data_type col_dtype; //!< Column data type
statistics_dtype stats_dtype; //!< Statistics data type for this column
uint32_t start_chunk; //!< Start chunk of this group
uint32_t num_chunks; //!< Number of chunks in group
data_type col_dtype; //!< Column data type
statistics_dtype stats_dtype{dtype_none}; //!< Statistics data type for this column
uint32_t start_chunk{}; //!< Start chunk of this group
uint32_t num_chunks{}; //!< Number of chunks in group
};

template <typename T, std::enable_if_t<!std::is_same_v<T, statistics::byte_array_view>>* = nullptr>
Expand Down