|
17 | 17 |
|
18 | 18 | #pragma once |
19 | 19 |
|
| 20 | +#include <gen_cpp/Descriptors_types.h> |
20 | 21 | #include <gen_cpp/segment_v2.pb.h> |
21 | 22 | #include <sys/types.h> |
22 | 23 |
|
@@ -69,6 +70,8 @@ class FileReader; |
69 | 70 | struct Slice; |
70 | 71 | struct StringRef; |
71 | 72 |
|
| 73 | +using TColumnAccessPaths = std::vector<TColumnAccessPath>; |
| 74 | + |
72 | 75 | namespace segment_v2 { |
73 | 76 |
|
74 | 77 | class EncodingInfo; |
@@ -271,7 +274,6 @@ class ColumnReader : public MetadataAdder<ColumnReader>, |
271 | 274 | Status _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges, |
272 | 275 | const ColumnIteratorOptions& iter_opts); |
273 | 276 |
|
274 | | -private: |
275 | 277 | int64_t _meta_length; |
276 | 278 | FieldType _meta_type; |
277 | 279 | FieldType _meta_children_column_type; |
@@ -366,8 +368,54 @@ class ColumnIterator { |
366 | 368 |
|
367 | 369 | virtual bool is_all_dict_encoding() const { return false; } |
368 | 370 |
|
| 371 | + virtual Status set_access_paths(const TColumnAccessPaths& all_access_paths, |
| 372 | + const TColumnAccessPaths& predicate_access_paths) { |
| 373 | + if (!predicate_access_paths.empty()) { |
| 374 | + _reading_flag = ReadingFlag::READING_FOR_PREDICATE; |
| 375 | + } |
| 376 | + return Status::OK(); |
| 377 | + } |
| 378 | + |
| 379 | + void set_column_name(const std::string& column_name) { _column_name = column_name; } |
| 380 | + |
| 381 | + const std::string& column_name() const { return _column_name; } |
| 382 | + |
| 383 | + // Since there may be multiple paths with conflicts or overlaps, |
| 384 | + // we need to define several reading flags: |
| 385 | + // |
| 386 | + // NORMAL_READING — Default value, indicating that the column should be read. |
| 387 | + // SKIP_READING — The column should not be read. |
| 388 | + // NEED_TO_READ — The column must be read. |
| 389 | + // READING_FOR_PREDICATE — The column is required for predicate evaluation. |
| 390 | + // |
| 391 | + // For example, suppose there are two paths: |
| 392 | + // - Path 1 specifies that column A needs to be read, so it is marked as NEED_TO_READ. |
| 393 | + // - Path 2 specifies that the column should not be read, but since it is already marked as NEED_TO_READ, |
| 394 | + // it should not be changed to SKIP_READING. |
| 395 | + enum class ReadingFlag : int { |
| 396 | + NORMAL_READING, |
| 397 | + SKIP_READING, |
| 398 | + NEED_TO_READ, |
| 399 | + READING_FOR_PREDICATE |
| 400 | + }; |
| 401 | + void set_reading_flag(ReadingFlag flag) { |
| 402 | + if (static_cast<int>(flag) > static_cast<int>(_reading_flag)) { |
| 403 | + _reading_flag = flag; |
| 404 | + } |
| 405 | + } |
| 406 | + |
| 407 | + ReadingFlag reading_flag() const { return _reading_flag; } |
| 408 | + |
| 409 | + virtual void set_need_to_read() { set_reading_flag(ReadingFlag::NEED_TO_READ); } |
| 410 | + |
| 411 | + virtual void remove_pruned_sub_iterators() {}; |
| 412 | + |
369 | 413 | protected: |
| 414 | + Result<TColumnAccessPaths> _get_sub_access_paths(const TColumnAccessPaths& access_paths); |
370 | 415 | ColumnIteratorOptions _opts; |
| 416 | + |
| 417 | + ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING}; |
| 418 | + std::string _column_name; |
371 | 419 | }; |
372 | 420 |
|
373 | 421 | // This iterator is used to read column data from file |
@@ -504,6 +552,13 @@ class MapFileColumnIterator final : public ColumnIterator { |
504 | 552 | return _offsets_iterator->get_current_ordinal(); |
505 | 553 | } |
506 | 554 |
|
| 555 | + Status set_access_paths(const TColumnAccessPaths& all_access_paths, |
| 556 | + const TColumnAccessPaths& predicate_access_paths) override; |
| 557 | + |
| 558 | + void set_need_to_read() override; |
| 559 | + |
| 560 | + void remove_pruned_sub_iterators() override; |
| 561 | + |
507 | 562 | private: |
508 | 563 | std::shared_ptr<ColumnReader> _map_reader = nullptr; |
509 | 564 | ColumnIteratorUPtr _null_iterator; |
@@ -533,6 +588,13 @@ class StructFileColumnIterator final : public ColumnIterator { |
533 | 588 | return _sub_column_iterators[0]->get_current_ordinal(); |
534 | 589 | } |
535 | 590 |
|
| 591 | + Status set_access_paths(const TColumnAccessPaths& all_access_paths, |
| 592 | + const TColumnAccessPaths& predicate_access_paths) override; |
| 593 | + |
| 594 | + void set_need_to_read() override; |
| 595 | + |
| 596 | + void remove_pruned_sub_iterators() override; |
| 597 | + |
536 | 598 | private: |
537 | 599 | std::shared_ptr<ColumnReader> _struct_reader = nullptr; |
538 | 600 | ColumnIteratorUPtr _null_iterator; |
@@ -561,6 +623,12 @@ class ArrayFileColumnIterator final : public ColumnIterator { |
561 | 623 | return _offset_iterator->get_current_ordinal(); |
562 | 624 | } |
563 | 625 |
|
| 626 | + Status set_access_paths(const TColumnAccessPaths& all_access_paths, |
| 627 | + const TColumnAccessPaths& predicate_access_paths) override; |
| 628 | + void set_need_to_read() override; |
| 629 | + |
| 630 | + void remove_pruned_sub_iterators() override; |
| 631 | + |
564 | 632 | private: |
565 | 633 | std::shared_ptr<ColumnReader> _array_reader = nullptr; |
566 | 634 | std::unique_ptr<OffsetFileColumnIterator> _offset_iterator; |
|
0 commit comments