Skip to content

Commit 1f09aae

Browse files
committed
code refactoring for external table.
1 parent 49d056e commit 1f09aae

11 files changed

+150
-328
lines changed

.idea/icon.svg

Lines changed: 0 additions & 7 deletions
This file was deleted.

.idea/vcs.xml

Lines changed: 0 additions & 22 deletions
This file was deleted.

be/src/vec/exec/format/parquet/vparquet_column_reader.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,8 @@ Status StructColumnReader::read_column_data(
995995

996996
if (reference_reader != nullptr) {
997997
// Read the reference column to get correct RL/DL information
998+
// TODO: Optimize by only reading RL/DL without actual data decoding
999+
9981000
// We need to find the FieldSchema for the reference column from _field_schema children
9991001
FieldSchema* ref_field_schema = nullptr;
10001002
for (auto& child : _field_schema->children) {

be/src/vec/exec/format/table/hive/hive_orc_nested_column_utils.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ void HiveOrcNestedColumnUtils::extract_nested_column_ids(
5555
// Normalization logic:
5656
// path: ["map_col", "*"] → ["map_col", "VALUES"] + ["map_col", "KEYS"]
5757
// path: ["map_col", "*", "field"] → ["map_col", "VALUES", "field"] + ["map_col", "KEYS"]
58-
// KEYS are always needed for correct RL/DL computation when accessing MAP via wildcard
5958
if (type.getKind() == orc::TypeKind::MAP) {
6059
auto wildcard_it = child_paths_by_table_col_name.find("*");
6160
if (wildcard_it != child_paths_by_table_col_name.end()) {
@@ -65,7 +64,7 @@ void HiveOrcNestedColumnUtils::extract_nested_column_ids(
6564
auto& values_paths = child_paths_by_table_col_name["VALUES"];
6665
values_paths.insert(values_paths.end(), wildcard_paths.begin(), wildcard_paths.end());
6766

68-
// Always add KEYS for wildcard access (needed for RL/DL computation)
67+
// Always add KEYS for wildcard access
6968
auto& keys_paths = child_paths_by_table_col_name["KEYS"];
7069
// Add an empty path to request full KEYS
7170
std::vector<std::string> empty_path;
@@ -97,7 +96,7 @@ void HiveOrcNestedColumnUtils::extract_nested_column_ids(
9796
}
9897

9998
// Special handling for Orc MAP structure:
100-
// When accessing only VALUES, we still need KEY structure for levels
99+
// When accessing only VALUES, we still need KEY structure for deduplicate_keys
101100
// Check if we're at key child (i==0) and only VALUES is requested (no KEYS)
102101
if (i == 0) {
103102
bool has_keys_access = child_paths_by_table_col_name.find("KEYS") !=

be/src/vec/exec/format/table/hive/hive_parquet_nested_column_utils.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ void HiveParquetNestedColumnUtils::extract_nested_column_ids(
5656
// Normalization logic:
5757
// path: ["map_col", "*"] → ["map_col", "VALUES"] + ["map_col", "KEYS"]
5858
// path: ["map_col", "*", "field"] → ["map_col", "VALUES", "field"] + ["map_col", "KEYS"]
59-
// KEYS are always needed for correct RL/DL computation when accessing MAP via wildcard
6059
if (field_schema.data_type->get_primitive_type() == PrimitiveType::TYPE_MAP) {
6160
auto wildcard_it = child_paths_by_table_col_name.find("*");
6261
if (wildcard_it != child_paths_by_table_col_name.end()) {
@@ -66,7 +65,7 @@ void HiveParquetNestedColumnUtils::extract_nested_column_ids(
6665
auto& values_paths = child_paths_by_table_col_name["VALUES"];
6766
values_paths.insert(values_paths.end(), wildcard_paths.begin(), wildcard_paths.end());
6867

69-
// Always add KEYS for wildcard access (needed for RL/DL computation)
68+
// Always add KEYS for wildcard access
7069
auto& keys_paths = child_paths_by_table_col_name["KEYS"];
7170
// Add an empty path to request full KEYS
7271
std::vector<std::string> empty_path;

0 commit comments

Comments
 (0)