Skip to content

Commit f853ef7

Browse files
authored
[opt](variant) improve performance for handling nullable column (#50021)
1 parent c2d0c66 commit f853ef7

File tree

3 files changed

+13
-16
lines changed

3 files changed

+13
-16
lines changed

be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ Status convert_and_write_column(vectorized::OlapBlockDataConvertor* converter,
134134
const uint8_t* nullmap = converted_column->get_nullmap();
135135
RETURN_IF_ERROR(writer->append(nullmap, converted_column->get_data(), num_rows));
136136

137-
converter->clear_source_content();
137+
converter->clear_source_content(column_id);
138138
return Status::OK();
139139
}
140140

@@ -291,8 +291,8 @@ Status VariantColumnWriterImpl::_process_root_column(vectorized::ColumnObject* p
291291
.data()
292292
: nullptr;
293293
RETURN_IF_ERROR(_root_writer->append(nullmap, column->get_data(), num_rows));
294+
converter->clear_source_content(column_id);
294295
++column_id;
295-
converter->clear_source_content();
296296

297297
_opts.meta->set_num_rows(num_rows);
298298
return Status::OK();
@@ -408,8 +408,8 @@ Status VariantColumnWriterImpl::_process_sparse_column(
408408
vectorized::ColumnObject::get_sparse_column_type());
409409
RETURN_IF_ERROR(
410410
_sparse_column_writer->append(column->get_nullmap(), column->get_data(), num_rows));
411+
converter->clear_source_content(column_id);
411412
++column_id;
412-
converter->clear_source_content();
413413

414414
// get stastics
415415
// todo: reuse the statics from collected stastics from compaction stage

be/src/vec/columns/column_object.cpp

+9-12
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
260260
if (schema_util::is_conversion_required_between_integers(
261261
base_type.idx, least_common_type.get_base_type_id())) {
262262
VLOG_DEBUG << "Conversion between " << getTypeName(base_type.idx) << " and "
263-
<< getTypeName(least_common_type.get_type_id());
263+
<< getTypeName(least_common_type.get_base_type_id());
264264
DataTypePtr base_data_type;
265265
TypeIndex base_data_type_id;
266266
get_least_supertype_jsonb(
@@ -792,7 +792,8 @@ void ColumnObject::try_insert(const Field& field) {
792792
}
793793
for (auto& entry : subcolumns) {
794794
if (old_size == entry->data.size()) {
795-
bool inserted = try_insert_default_from_nested(entry);
795+
bool inserted = UNLIKELY(entry->path.has_nested_part() &&
796+
try_insert_default_from_nested(entry));
796797
if (!inserted) {
797798
entry->data.insert_default();
798799
}
@@ -838,7 +839,6 @@ bool ColumnObject::Subcolumn::is_null_at(size_t n) const {
838839
}
839840
ind -= part->size();
840841
}
841-
842842
throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range",
843843
n);
844844
}
@@ -873,7 +873,6 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res) const {
873873

874874
ind -= part->size();
875875
}
876-
877876
throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range",
878877
n);
879878
}
@@ -894,20 +893,18 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
894893
row -= num_of_defaults_in_prefix;
895894
for (size_t i = 0; i < data.size(); ++i) {
896895
const auto& part = data[i];
897-
size_t current_column_size = part->size();
896+
const auto& nullable_col =
897+
assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*part);
898+
size_t current_column_size = nullable_col.get_null_map_data().size();
898899
if (row < current_column_size) {
899900
// no need null in sparse column
900-
if (!assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*part).is_null_at(
901-
row)) {
901+
if (!nullable_col.is_null_at(row)) {
902902
// insert key
903903
key->insert_data(path.data(), path.size());
904904

905905
// every subcolumn is always Nullable
906906
auto nullable_serde =
907907
std::static_pointer_cast<DataTypeNullableSerDe>(data_serdes[i]);
908-
auto& nullable_col =
909-
assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*part);
910-
911908
// insert value
912909
ColumnString::Chars& chars = value->get_chars();
913910
nullable_serde->get_nested_serde()->write_one_cell_to_binary(
@@ -1343,7 +1340,6 @@ size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& ou
13431340

13441341
ind -= part->size();
13451342
}
1346-
13471343
throw doris::Exception(ErrorCode::OUT_OF_BOUND,
13481344
"Index ({}) for serializing JSON is out of range", n);
13491345
}
@@ -1906,7 +1902,7 @@ Status ColumnObject::finalize(FinalizeMode mode) {
19061902
for (size_t i = 0; i < std::min(size_t(_max_subcolumns_count), sorted_by_size.size());
19071903
++i) {
19081904
// if too many null values, then consider it as sparse column
1909-
if ((double)sorted_by_size[i].second < (double)num_rows * 0.95) {
1905+
if ((double)sorted_by_size[i].second < (double)num_rows * 0.99) {
19101906
continue;
19111907
}
19121908
selected_path.insert(sorted_by_size[i].first);
@@ -2035,6 +2031,7 @@ void ColumnObject::clear_column_data() {
20352031
(*std::move(part)).clear();
20362032
}
20372033
entry->data.num_of_defaults_in_prefix = 0;
2034+
entry->data.current_num_of_defaults = 0;
20382035
entry->data.num_rows = 0;
20392036
}
20402037
serialized_sparse_column->clear();

be/src/vec/columns/column_object.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
171171

172172
void reset_current_num_of_defaults() { current_num_of_defaults = 0; }
173173

174-
size_t cur_num_of_defaults() { return current_num_of_defaults; }
174+
size_t cur_num_of_defaults() const { return current_num_of_defaults; }
175175

176176
void insert_many_defaults(size_t length);
177177

0 commit comments

Comments
 (0)