Skip to content

Commit c2d0c66

Browse files
author
amory
authored
[ut](column_variant)update ut cases and some fix (#49776)
1 parent e1f5c67 commit c2d0c66

25 files changed

+3645
-308
lines changed

be/src/vec/columns/column_object.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,22 @@ void ColumnObject::insert_from_sparse_column_and_fill_remaing_dense_column(
12801280
}
12811281

12821282
ColumnPtr ColumnObject::permute(const Permutation& perm, size_t limit) const {
1283+
if (limit == 0)
1284+
limit = num_rows;
1285+
else
1286+
limit = std::min(num_rows, limit);
1287+
1288+
if (perm.size() < limit) {
1289+
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
1290+
"Size of permutation ({}) is less than required ({})", perm.size(),
1291+
limit);
1292+
__builtin_unreachable();
1293+
}
1294+
1295+
if (limit == 0) {
1296+
return ColumnObject::create(_max_subcolumns_count);
1297+
}
1298+
12831299
return apply_for_columns([&](const ColumnPtr column) { return column->permute(perm, limit); });
12841300
}
12851301

@@ -1558,6 +1574,7 @@ bool ColumnObject::Subcolumn::is_empty_nested(size_t row) const {
15581574
DCHECK(type->equals(*ColumnObject::NESTED_TYPE));
15591575
Field field;
15601576
get(row, field);
1577+
field = get_field_from_variant_field(field);
15611578
if (field.get_type() == Field::Types::Array) {
15621579
const auto& array = field.get<Array>();
15631580
bool only_nulls_inside = true;
@@ -1943,20 +1960,13 @@ bool ColumnObject::empty() const {
19431960
return subcolumns.empty() || subcolumns.begin()->get()->path.get_path() == COLUMN_NAME_DUMMY;
19441961
}
19451962

1946-
ColumnPtr get_base_column_of_array(const ColumnPtr& column) {
1947-
if (const auto* column_array = check_and_get_column<ColumnArray>(column.get())) {
1948-
return column_array->get_data_ptr();
1949-
}
1950-
return column;
1951-
}
1952-
19531963
ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const {
19541964
if (!is_finalized()) {
19551965
auto finalized = clone_finalized();
19561966
auto& finalized_object = assert_cast<ColumnObject&>(*finalized);
19571967
return finalized_object.filter(filter, count);
19581968
}
1959-
if (subcolumns.empty()) {
1969+
if (num_rows == 0) {
19601970
auto res = ColumnObject::create(_max_subcolumns_count, count_bytes_in_filter(filter));
19611971
ENABLE_CHECK_CONSISTENCY(res.get());
19621972
return res;
@@ -1979,6 +1989,9 @@ ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const {
19791989

19801990
ColumnPtr ColumnObject::replicate(const IColumn::Offsets& offsets) const {
19811991
column_match_offsets_size(num_rows, offsets.size());
1992+
if (num_rows == 0) {
1993+
return ColumnObject::create(_max_subcolumns_count);
1994+
}
19821995
return apply_for_columns([&](const ColumnPtr column) { return column->replicate(offsets); });
19831996
}
19841997

@@ -2162,8 +2175,9 @@ Status ColumnObject::sanitize() const {
21622175
for (const auto& subcolumn : subcolumns) {
21632176
if (subcolumn->data.is_finalized()) {
21642177
auto column = subcolumn->data.get_least_common_type()->create_column();
2165-
std::string original = subcolumn->data.get_finalized_column().get_name();
2166-
std::string expected = column->get_name();
2178+
std::string original =
2179+
remove_nullable(subcolumn->data.get_finalized_column().get_ptr())->get_name();
2180+
std::string expected = remove_nullable(column->get_ptr())->get_name();
21672181
if (original != expected) {
21682182
return Status::InternalError("Incompatible type between {} and {}, debug_info:",
21692183
original, expected, debug_string());

be/src/vec/columns/column_object.h

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -510,86 +510,6 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
510510
void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
511511
const uint8_t* __restrict null_data) const override;
512512

513-
Int64 get_int(size_t /*n*/) const override {
514-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int " + get_name());
515-
}
516-
517-
bool get_bool(size_t /*n*/) const override {
518-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_bool " + get_name());
519-
}
520-
521-
void insert_many_fix_len_data(const char* pos, size_t num) override {
522-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
523-
"insert_many_fix_len_data " + get_name());
524-
}
525-
526-
void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
527-
size_t data_num, uint32_t dict_num = 0) override {
528-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
529-
"insert_many_dict_data " + get_name());
530-
}
531-
532-
void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets,
533-
const size_t num) override {
534-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
535-
"insert_many_continuous_binary_data " + get_name());
536-
}
537-
538-
void insert_many_strings(const StringRef* strings, size_t num) override {
539-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
540-
"insert_many_strings " + get_name());
541-
}
542-
543-
void insert_many_strings_overflow(const StringRef* strings, size_t num,
544-
size_t max_length) override {
545-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
546-
"insert_many_strings_overflow " + get_name());
547-
}
548-
549-
void insert_many_raw_data(const char* pos, size_t num) override {
550-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
551-
"insert_many_raw_data " + get_name());
552-
}
553-
554-
size_t get_max_row_byte_size() const override {
555-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
556-
"get_max_row_byte_size " + get_name());
557-
}
558-
559-
void serialize_vec(StringRef* keys, size_t num_rows, size_t max_row_byte_size) const override {
560-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
561-
"serialize_vec" + std::string(get_name()));
562-
}
563-
564-
void serialize_vec_with_null_map(StringRef* keys, size_t num_rows,
565-
const uint8_t* null_map) const override {
566-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
567-
"serialize_vec_with_null_map " + get_name());
568-
}
569-
570-
void deserialize_vec(StringRef* keys, const size_t num_rows) override {
571-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
572-
"deserialize_vec" + std::string(get_name()));
573-
}
574-
575-
void deserialize_vec_with_null_map(StringRef* keys, const size_t num_rows,
576-
const uint8_t* null_map) override {
577-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
578-
"deserialize_vec_with_null_map " + get_name());
579-
}
580-
581-
bool structure_equals(const IColumn&) const override {
582-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "structure_equals " + get_name());
583-
}
584-
585-
StringRef get_raw_data() const override {
586-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_raw_data " + get_name());
587-
}
588-
589-
StringRef get_data_at(size_t) const override {
590-
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_data_at " + get_name());
591-
}
592-
593513
StringRef serialize_value_into_arena(size_t n, Arena& arena,
594514
char const*& begin) const override {
595515
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,

be/src/vec/core/field.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ class JsonbField {
184184
}
185185

186186
JsonbField& operator=(const JsonbField& x) {
187+
if (data) {
188+
delete[] data;
189+
}
187190
data = new char[size];
188191
if (!data) {
189192
throw Exception(Status::FatalError("new data buffer failed, size: {}", size));

be/src/vec/data_types/data_type_factory.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,9 @@ DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type,
420420
case FieldType::OLAP_FIELD_TYPE_STRING:
421421
result = std::make_shared<vectorized::DataTypeString>();
422422
break;
423+
case FieldType::OLAP_FIELD_TYPE_VARIANT:
424+
result = std::make_shared<vectorized::DataTypeObject>();
425+
break;
423426
case FieldType::OLAP_FIELD_TYPE_JSONB:
424427
result = std::make_shared<vectorized::DataTypeJsonb>();
425428
break;

be/src/vec/data_types/serde/data_type_object_serde.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
#else
3737
#include "util/jsonb_parser.h"
3838
#endif
39+
#include "vec/json/json_parser.h"
40+
#include "vec/json/parse2column.cpp"
3941

4042
namespace doris {
4143

@@ -124,6 +126,22 @@ Status DataTypeObjectSerDe::serialize_one_cell_to_json(const IColumn& column, in
124126
return Status::OK();
125127
}
126128

129+
Status DataTypeObjectSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
130+
const FormatOptions& options) const {
131+
vectorized::ParseConfig config;
132+
auto parser = parsers_pool.get([] { return new JsonParser(); });
133+
RETURN_IF_CATCH_EXCEPTION(
134+
parse_json_to_variant(column, slice.data, slice.size, parser.get(), config));
135+
return Status::OK();
136+
}
137+
138+
Status DataTypeObjectSerDe::deserialize_column_from_json_vector(
139+
IColumn& column, std::vector<Slice>& slices, uint64_t* num_deserialized,
140+
const FormatOptions& options) const {
141+
DESERIALIZE_COLUMN_FROM_JSON_VECTOR()
142+
return Status::OK();
143+
}
144+
127145
void DataTypeObjectSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map,
128146
arrow::ArrayBuilder* array_builder, int64_t start,
129147
int64_t end, const cctz::time_zone& ctz) const {

be/src/vec/data_types/serde/data_type_object_serde.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,11 @@ class DataTypeObjectSerDe : public DataTypeSerDe {
4646
return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name());
4747
}
4848
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
49-
const FormatOptions& options) const override {
50-
return Status::NotSupported("deserialize_one_cell_from_text with type " +
51-
column.get_name());
52-
}
49+
const FormatOptions& options) const override;
50+
5351
Status deserialize_column_from_json_vector(IColumn& column, std::vector<Slice>& slices,
5452
uint64_t* num_deserialized,
55-
const FormatOptions& options) const override {
56-
return Status::NotSupported("deserialize_column_from_text_vector with type " +
57-
column.get_name());
58-
}
53+
const FormatOptions& options) const override;
5954

6055
Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start,
6156
int64_t end) const override {
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
column: variant with allocate size: 545847616
2+
545847616
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
column: variant with byte_size: 493265073
2+
493265073
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
column: variant with hashes: 2476392122 with ptr: 2225
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
column: variant with hashes: 2476392122 with ptr: 2225

0 commit comments

Comments
 (0)