-
Notifications
You must be signed in to change notification settings - Fork 205
Add native Apache Iceberg table support with CoralCatalog abstraction #556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 11 commits
bbb4332
fd6b6c0
f1cdbf2
5d84162
f54221f
9b072ea
27061e2
9908631
f38d897
ee4d9de
ec45fd0
5ee910c
c9762dc
980cb1b
fa8fb33
06d926a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| /** | ||
| * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. | ||
| * Copyright 2017-2025 LinkedIn Corporation. All rights reserved. | ||
| * Licensed under the BSD-2 Clause license. | ||
| * See LICENSE in the project root for license information. | ||
| */ | ||
|
|
@@ -17,46 +17,86 @@ | |
| import org.apache.calcite.rel.type.RelProtoDataType; | ||
| import org.apache.calcite.schema.*; | ||
|
|
||
| import com.linkedin.coral.common.catalog.CoralCatalog; | ||
| import com.linkedin.coral.common.catalog.CoralTable; | ||
| import com.linkedin.coral.common.catalog.HiveCoralTable; | ||
| import com.linkedin.coral.common.catalog.IcebergCoralTable; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkNotNull; | ||
| import static com.linkedin.coral.common.catalog.TableType.VIEW; | ||
|
|
||
|
|
||
| /** | ||
| * Adaptor from Hive catalog providing database and table names | ||
| * to Calcite {@link Schema} | ||
| * Adaptor from catalog providing database and table names to Calcite {@link Schema}. | ||
| * Can use either CoralCatalog for unified access or HiveMetastoreClient for Hive-specific access. | ||
| */ | ||
| public class HiveDbSchema implements Schema { | ||
|
|
||
| public static final String DEFAULT_DB = "default"; | ||
|
|
||
| private final CoralCatalog coralCatalog; | ||
| private final HiveMetastoreClient msc; | ||
| private final String dbName; | ||
|
|
||
| HiveDbSchema(@Nonnull HiveMetastoreClient msc, @Nonnull String dbName) { | ||
| checkNotNull(msc); | ||
| checkNotNull(dbName); | ||
| /** | ||
| * Constructor for HiveDbSchema. Exactly one of coralCatalog or msc must be non-null. | ||
| * | ||
| * @param coralCatalog Coral catalog for unified access (can be null if msc is provided) | ||
| * @param msc Hive metastore client for Hive-specific access (can be null if coralCatalog is provided) | ||
| * @param dbName Database name (must not be null) | ||
| */ | ||
| HiveDbSchema(CoralCatalog coralCatalog, HiveMetastoreClient msc, @Nonnull String dbName) { | ||
|
||
| this.coralCatalog = coralCatalog; | ||
| this.msc = msc; | ||
| this.dbName = dbName; | ||
| this.dbName = checkNotNull(dbName); | ||
| } | ||
|
|
||
| @Override | ||
| public Table getTable(String name) { | ||
| org.apache.hadoop.hive.metastore.api.Table table = msc.getTable(dbName, name); | ||
| if (table == null) { | ||
| if (coralCatalog != null) { | ||
| // Use CoralCatalog for unified table access | ||
| CoralTable coralTable = coralCatalog.getTable(dbName, name); | ||
| if (coralTable == null) { | ||
| return null; | ||
| } | ||
|
|
||
| // Dispatch based on CoralTable implementation type | ||
| if (coralTable instanceof IcebergCoralTable) { | ||
| return new IcebergTable((IcebergCoralTable) coralTable); | ||
| } else if (coralTable instanceof HiveCoralTable) { | ||
| HiveCoralTable hiveCoralTable = (HiveCoralTable) coralTable; | ||
| // Check if it's a view | ||
| if (hiveCoralTable.tableType() == VIEW) { | ||
| return new HiveViewTable(hiveCoralTable, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); | ||
| } else { | ||
| return new HiveTable(hiveCoralTable); | ||
| } | ||
| } | ||
| return null; | ||
| } | ||
| org.apache.hadoop.hive.metastore.TableType tableType = | ||
| Enum.valueOf(org.apache.hadoop.hive.metastore.TableType.class, table.getTableType()); | ||
| switch (tableType) { | ||
| case VIRTUAL_VIEW: | ||
| return new HiveViewTable(table, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); | ||
| default: | ||
| return new HiveTable(table); | ||
| } else { | ||
| // Use HiveMetastoreClient for Hive-specific access | ||
| org.apache.hadoop.hive.metastore.api.Table hiveTable = msc.getTable(dbName, name); | ||
| if (hiveTable == null) { | ||
| return null; | ||
| } | ||
|
|
||
| // Wrap in HiveCoralTable and dispatch | ||
| HiveCoralTable hiveCoralTable = new HiveCoralTable(hiveTable); | ||
| if (hiveCoralTable.tableType() == VIEW) { | ||
| return new HiveViewTable(hiveCoralTable, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); | ||
| } else { | ||
| return new HiveTable(hiveCoralTable); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Set<String> getTableNames() { | ||
| return ImmutableSet.copyOf(msc.getAllTables(dbName)); | ||
| if (coralCatalog != null) { | ||
| return ImmutableSet.copyOf(coralCatalog.getAllTables(dbName)); | ||
| } else { | ||
| return ImmutableSet.copyOf(msc.getAllTables(dbName)); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another reason why this needs to be a plugin. This should integrate with OSS Iceberg too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
specifically I want to bring in the custom shaded distribution of li-iceberg-hive-metastore here. But in general, I feel ok but Coral depending on li-iceberg.