23
23
#include < exception>
24
24
#include < iostream>
25
25
#include < memory>
26
+ #include < optional>
26
27
#include < string>
27
28
#include < type_traits>
28
29
#include < unordered_map>
29
30
#include < utility>
30
31
#include < vector>
31
32
32
33
#include " arrow/array.h"
34
+ #include " arrow/array/array_binary.h"
33
35
#include " arrow/array/builder_binary.h"
34
36
#include " arrow/array/builder_dict.h"
35
37
#include " arrow/array/builder_primitive.h"
@@ -2040,23 +2042,29 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
2040
2042
LevelInfo leaf_info_;
2041
2043
};
2042
2044
2043
- class FLBARecordReader : public TypedRecordReader <FLBAType>,
2044
- virtual public BinaryRecordReader {
2045
+ class FLBARecordReader final : public TypedRecordReader<FLBAType>,
2046
+ virtual public BinaryRecordReader {
2045
2047
public:
2046
2048
FLBARecordReader (const ColumnDescriptor* descr, LevelInfo leaf_info,
2047
2049
::arrow::MemoryPool* pool, bool read_dense_for_nullable)
2048
2050
: TypedRecordReader<FLBAType>(descr, leaf_info, pool, read_dense_for_nullable),
2049
- builder_ (nullptr ) {
2051
+ byte_width_ (descr_->type_length ()),
2052
+ empty_(byte_width_, 0 ),
2053
+ type_(::arrow::fixed_size_binary(byte_width_)),
2054
+ null_bitmap_builder_(pool),
2055
+ data_builder_(pool) {
2050
2056
ARROW_DCHECK_EQ (descr_->physical_type (), Type::FIXED_LEN_BYTE_ARRAY);
2051
- int byte_width = descr_->type_length ();
2052
- std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary (byte_width);
2053
- builder_ = std::make_unique<::arrow::FixedSizeBinaryBuilder>(type, this ->pool_ );
2054
2057
}
2055
2058
2056
2059
::arrow::ArrayVector GetBuilderChunks () override {
2057
- std::shared_ptr<::arrow::Array> chunk;
2058
- PARQUET_THROW_NOT_OK (builder_->Finish (&chunk));
2059
- return ::arrow::ArrayVector ({chunk});
2060
+ const int64_t null_count = null_bitmap_builder_.false_count ();
2061
+ const int64_t length = null_bitmap_builder_.length ();
2062
+ ARROW_DCHECK_EQ (length * byte_width_, data_builder_.length ());
2063
+ PARQUET_ASSIGN_OR_THROW (auto data_buffer, data_builder_.Finish ());
2064
+ PARQUET_ASSIGN_OR_THROW (auto null_bitmap, null_bitmap_builder_.Finish ());
2065
+ auto chunk = std::make_shared<::arrow::FixedSizeBinaryArray>(
2066
+ type_, length, data_buffer, null_bitmap, null_count);
2067
+ return ::arrow::ArrayVector ({std::move (chunk)});
2060
2068
}
2061
2069
2062
2070
void ReadValuesDense (int64_t values_to_read) override {
@@ -2065,9 +2073,9 @@ class FLBARecordReader : public TypedRecordReader<FLBAType>,
2065
2073
this ->current_decoder_ ->Decode (values, static_cast <int >(values_to_read));
2066
2074
CheckNumberDecoded (num_decoded, values_to_read);
2067
2075
2068
- for ( int64_t i = 0 ; i < num_decoded; i++) {
2069
- PARQUET_THROW_NOT_OK (builder_-> Append (values[i]. ptr ));
2070
- }
2076
+ PARQUET_THROW_NOT_OK (null_bitmap_builder_. Reserve ( num_decoded));
2077
+ PARQUET_THROW_NOT_OK (data_builder_. Reserve (num_decoded * byte_width_ ));
2078
+ UnsafeAppendDense (values, num_decoded);
2071
2079
ResetValues ();
2072
2080
}
2073
2081
@@ -2081,22 +2089,45 @@ class FLBARecordReader : public TypedRecordReader<FLBAType>,
2081
2089
valid_bits, valid_bits_offset);
2082
2090
ARROW_DCHECK_EQ (num_decoded, values_to_read);
2083
2091
2092
+ PARQUET_THROW_NOT_OK (null_bitmap_builder_.Reserve (num_decoded));
2093
+ PARQUET_THROW_NOT_OK (data_builder_.Reserve (num_decoded * byte_width_));
2094
+ if (null_count == 0 ) {
2095
+ UnsafeAppendDense (values, num_decoded);
2096
+ } else {
2097
+ UnsafeAppendSpaced (values, num_decoded, valid_bits, valid_bits_offset);
2098
+ }
2099
+ ResetValues ();
2100
+ }
2101
+
2102
+ void UnsafeAppendDense (const FLBA* values, int64_t num_decoded) {
2103
+ null_bitmap_builder_.UnsafeAppend (num_decoded, /* value=*/ true );
2104
+ for (int64_t i = 0 ; i < num_decoded; i++) {
2105
+ data_builder_.UnsafeAppend (values[i].ptr , byte_width_);
2106
+ }
2107
+ }
2108
+
2109
+ void UnsafeAppendSpaced (const FLBA* values, int64_t num_decoded,
2110
+ const uint8_t * valid_bits, int64_t valid_bits_offset) {
2111
+ null_bitmap_builder_.UnsafeAppend (valid_bits, valid_bits_offset, num_decoded);
2084
2112
for (int64_t i = 0 ; i < num_decoded; i++) {
2085
2113
if (::arrow::bit_util::GetBit (valid_bits, valid_bits_offset + i)) {
2086
- PARQUET_THROW_NOT_OK (builder_-> Append ( values[i].ptr ) );
2114
+ data_builder_. UnsafeAppend ( values[i].ptr , byte_width_ );
2087
2115
} else {
2088
- PARQUET_THROW_NOT_OK (builder_-> AppendNull () );
2116
+ data_builder_. UnsafeAppend (empty_. data (), byte_width_ );
2089
2117
}
2090
2118
}
2091
- ResetValues ();
2092
2119
}
2093
2120
2094
2121
private:
2095
- std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_;
2122
+ const int byte_width_;
2123
+ const std::vector<uint8_t > empty_;
2124
+ std::shared_ptr<::arrow::DataType> type_;
2125
+ ::arrow::TypedBufferBuilder<bool > null_bitmap_builder_;
2126
+ ::arrow::BufferBuilder data_builder_;
2096
2127
};
2097
2128
2098
- class ByteArrayChunkedRecordReader : public TypedRecordReader <ByteArrayType>,
2099
- virtual public BinaryRecordReader {
2129
+ class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayType>,
2130
+ virtual public BinaryRecordReader {
2100
2131
public:
2101
2132
ByteArrayChunkedRecordReader (const ColumnDescriptor* descr, LevelInfo leaf_info,
2102
2133
::arrow::MemoryPool* pool, bool read_dense_for_nullable)
@@ -2137,8 +2168,8 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>,
2137
2168
typename EncodingTraits<ByteArrayType>::Accumulator accumulator_;
2138
2169
};
2139
2170
2140
- class ByteArrayDictionaryRecordReader : public TypedRecordReader <ByteArrayType>,
2141
- virtual public DictionaryRecordReader {
2171
+ class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArrayType>,
2172
+ virtual public DictionaryRecordReader {
2142
2173
public:
2143
2174
ByteArrayDictionaryRecordReader (const ColumnDescriptor* descr, LevelInfo leaf_info,
2144
2175
::arrow::MemoryPool* pool, bool read_dense_for_nullable)
0 commit comments