Skip to content

Commit 6950c4b

Browse files
aastha25wmoustafa
andauthored
Integrate Coral Type System for Hive Tables (#563)
* Add Coral type system * Remove Coral prefix from type names * enhance coraltype system * fix build * rename pvt methods * prioritize original conversion over coral type system --------- Co-authored-by: Walaa Eldin Moustafa <[email protected]>
1 parent 3058e79 commit 6950c4b

File tree

7 files changed

+166
-19
lines changed

7 files changed

+166
-19
lines changed

coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2017-2025 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -39,6 +39,11 @@
3939
import org.slf4j.Logger;
4040
import org.slf4j.LoggerFactory;
4141

42+
import com.linkedin.coral.common.types.CoralDataType;
43+
import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter;
44+
import com.linkedin.coral.common.types.StructField;
45+
import com.linkedin.coral.common.types.StructType;
46+
4247

4348
/**
4449
* Adaptor class from Hive {@link org.apache.hadoop.hive.metastore.api.Table} representation to
@@ -134,12 +139,82 @@ private void checkDaliTable() {
134139
// Preconditions.checkState(isDaliTable());
135140
}
136141

142+
/**
143+
* Returns the row type (schema) for this table.
144+
*
145+
* Current behavior (validation/shadow mode):
146+
* - Always returns the legacy Hive → Calcite direct conversion
147+
* - Validates against the new Hive → Coral → Calcite two-stage conversion
148+
* - Logs warnings if conversions don't match or if validation fails
149+
*
150+
* This allows safe validation of the new conversion path in production
151+
* before switching to use it as the primary path.
152+
*
153+
* @param typeFactory Calcite type factory
154+
* @return RelDataType representing the table schema
155+
*/
137156
@Override
138157
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
158+
// Always compute and return the legacy Hive direct conversion (production path)
159+
RelDataType hiveType = getRowTypeFromHiveType(typeFactory);
160+
161+
// Validate against new two-stage Coral conversion (shadow/validation mode)
162+
try {
163+
RelDataType coralType = getRowTypeFromCoralType(typeFactory);
164+
165+
// Compare the two type representations
166+
if (!hiveType.equals(coralType)) {
167+
LOG.warn("Hive and Coral type conversion mismatch for table {}.{}. Hive: {}, Coral: {}", hiveTable.getDbName(),
168+
hiveTable.getTableName(), hiveType, coralType);
169+
}
170+
} catch (Exception e) {
171+
// Log validation failure but continue with Hive type (zero production impact)
172+
LOG.warn("Coral type validation failed for table {}.{}. Proceeding with Hive type. Error: {}",
173+
hiveTable.getDbName(), hiveTable.getTableName(), e.getMessage(), e);
174+
}
175+
176+
// Always return the battle-tested Hive conversion result
177+
return hiveType;
178+
}
179+
180+
/**
181+
* Two-stage conversion: Hive → Coral → Calcite.
182+
* This is the preferred path when using CoralCatalog.
183+
*/
184+
private RelDataType getRowTypeFromCoralType(RelDataTypeFactory typeFactory) {
185+
// Stage 1: Hive → Coral
186+
CoralDataType coralSchema = getCoralSchema();
187+
188+
// Stage 2: Coral → Calcite
189+
if (!(coralSchema instanceof StructType)) {
190+
throw new IllegalStateException("Expected StructType from getCoralSchema(), got: " + coralSchema.getClass());
191+
}
192+
193+
StructType structType = (StructType) coralSchema;
194+
List<StructField> fields = structType.getFields();
195+
196+
List<RelDataType> fieldTypes = new ArrayList<>(fields.size());
197+
List<String> fieldNames = new ArrayList<>(fields.size());
198+
199+
for (StructField field : fields) {
200+
fieldNames.add(field.getName());
201+
RelDataType fieldType = CoralTypeToRelDataTypeConverter.convert(field.getType(), typeFactory);
202+
fieldTypes.add(fieldType);
203+
}
204+
205+
return typeFactory.createStructType(fieldTypes, fieldNames);
206+
}
207+
208+
/**
209+
* Direct conversion: Hive → Calcite.
210+
* This is the legacy path for backward compatibility.
211+
*/
212+
private RelDataType getRowTypeFromHiveType(RelDataTypeFactory typeFactory) {
139213
final List<FieldSchema> cols = getColumns();
140214
final List<RelDataType> fieldTypes = new ArrayList<>(cols.size());
141215
final List<String> fieldNames = new ArrayList<>(cols.size());
142216
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());
217+
143218
allCols.forEach(col -> {
144219
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
145220
final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory);
@@ -153,6 +228,40 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
153228
return typeFactory.createStructType(fieldTypes, fieldNames);
154229
}
155230

231+
/**
232+
* Returns the table schema in Coral type system.
233+
* This includes both regular columns (from StorageDescriptor) and partition columns.
234+
* Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter.
235+
*
236+
* @return StructType representing the full table schema (columns + partitions)
237+
*/
238+
public CoralDataType getCoralSchema() {
239+
final List<FieldSchema> cols = getColumns();
240+
final List<StructField> fields = new ArrayList<>();
241+
final List<String> fieldNames = new ArrayList<>();
242+
243+
// Combine regular columns and partition keys (same as HiveTable.getRowType)
244+
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());
245+
246+
for (FieldSchema col : allCols) {
247+
final String colName = col.getName();
248+
249+
// Skip duplicate columns (partition keys might overlap with regular columns)
250+
if (!fieldNames.contains(colName)) {
251+
// Convert Hive type string to TypeInfo, then to CoralDataType
252+
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
253+
final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo);
254+
255+
fields.add(StructField.of(colName, coralType));
256+
fieldNames.add(colName);
257+
}
258+
}
259+
260+
// Return struct type representing the table schema
261+
// Table-level struct is nullable (Hive convention)
262+
return StructType.of(fields, true);
263+
}
264+
156265
private List<FieldSchema> getColumns() {
157266
StorageDescriptor sd = hiveTable.getSd();
158267
String serDeLib = getSerializationLib();

coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
7272
case DATE:
7373
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
7474
case TIMESTAMP:
75-
// Default to microsecond precision (6)
76-
return TimestampType.of(3, nullable);
75+
// Hive TIMESTAMP has no explicit precision (matches TypeConverter behavior)
76+
// Use PRECISION_NOT_SPECIFIED (-1) to match Calcite's behavior
77+
return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable);
7778
case BINARY:
7879
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
7980
case DECIMAL:
@@ -86,6 +87,7 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
8687
CharTypeInfo charType = (CharTypeInfo) type;
8788
return CharType.of(charType.getLength(), nullable);
8889
case VOID:
90+
return PrimitiveType.of(CoralTypeKind.NULL, true);
8991
case UNKNOWN:
9092
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
9193
default:
@@ -118,12 +120,18 @@ private static CoralDataType convertStruct(StructTypeInfo structType) {
118120
}
119121

120122
private static CoralDataType convertUnion(UnionTypeInfo unionType) {
121-
// For UNION types, we'll create a struct with all possible fields
122-
// This is similar to how some systems handle union types
123+
// For UNION types, create a struct conforming to Trino's union representation
124+
// Schema: {tag, field0, field1, ..., fieldN}
125+
// See: https://github.com/trinodb/trino/pull/3483
123126
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();
124127

125-
// Create fields for each possible type in the union
128+
// Create fields: "tag" field first (INTEGER), then "field0", "field1", etc.
126129
List<StructField> fields = new ArrayList<>();
130+
131+
// Add "tag" field (INTEGER) to indicate which union member is active
132+
fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true)));
133+
134+
// Add fields for each possible type in the union
127135
for (int i = 0; i < memberTypes.size(); i++) {
128136
CoralDataType fieldType = convert(memberTypes.get(i));
129137
fields.add(StructField.of("field" + i, fieldType));

coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ public enum CoralTypeKind {
3434
// Binary types
3535
BINARY,
3636

37+
// Special types
38+
NULL,
39+
3740
// Complex types
3841
ARRAY,
3942
MAP,

coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,13 @@ public static RelDataType convert(CoralDataType type, RelDataTypeFactory factory
3636
relType = convertPrimitive((PrimitiveType) type, factory);
3737
} else if (type instanceof TimestampType) {
3838
TimestampType ts = (TimestampType) type;
39-
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
39+
// Handle unspecified precision (Hive compatibility)
40+
if (ts.hasPrecision()) {
41+
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
42+
} else {
43+
// No precision specified - matches TypeConverter behavior
44+
relType = factory.createSqlType(SqlTypeName.TIMESTAMP);
45+
}
4046
} else if (type instanceof DecimalType) {
4147
DecimalType dec = (DecimalType) type;
4248
relType = factory.createSqlType(SqlTypeName.DECIMAL, dec.getPrecision(), dec.getScale());
@@ -107,6 +113,8 @@ private static RelDataType convertPrimitive(PrimitiveType prim, RelDataTypeFacto
107113
return factory.createSqlType(SqlTypeName.TIME);
108114
case BINARY:
109115
return factory.createSqlType(SqlTypeName.BINARY);
116+
case NULL:
117+
return factory.createSqlType(SqlTypeName.NULL);
110118
default:
111119
// Fallback for unsupported primitive types
112120
return factory.createSqlType(SqlTypeName.ANY);

coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,28 @@
1212
* Represents a TIMESTAMP type with fractional second precision in the Coral type system.
1313
*
1414
* Precision indicates the number of fractional digits of seconds, e.g.:
15+
* - -1: unspecified (PRECISION_NOT_SPECIFIED, for Hive compatibility)
1516
* - 0: seconds
1617
* - 3: milliseconds
1718
* - 6: microseconds
1819
* - 9: nanoseconds
1920
*/
2021
public final class TimestampType implements CoralDataType {
22+
/** Constant for unspecified precision (matches Calcite's RelDataType.PRECISION_NOT_SPECIFIED) */
23+
public static final int PRECISION_NOT_SPECIFIED = -1;
24+
2125
private final int precision;
2226
private final boolean nullable;
2327

2428
/**
2529
* Create a TIMESTAMP type with the given precision and nullability.
26-
* @param precision fractional second precision (0-9)
30+
* @param precision fractional second precision (-1 for unspecified, or 0-9)
2731
* @param nullable whether this type allows null values
2832
*/
2933
public static TimestampType of(int precision, boolean nullable) {
30-
if (precision < 0 || precision > 9) {
31-
throw new IllegalArgumentException("Timestamp precision must be in range [0, 9], got: " + precision);
34+
if (precision != PRECISION_NOT_SPECIFIED && (precision < 0 || precision > 9)) {
35+
throw new IllegalArgumentException(
36+
"Timestamp precision must be -1 (unspecified) or in range [0, 9], got: " + precision);
3237
}
3338
return new TimestampType(precision, nullable);
3439
}
@@ -39,12 +44,19 @@ private TimestampType(int precision, boolean nullable) {
3944
}
4045

4146
/**
42-
* @return the fractional second precision (0-9)
47+
* @return the fractional second precision (-1 for unspecified, or 0-9)
4348
*/
4449
public int getPrecision() {
4550
return precision;
4651
}
4752

53+
/**
54+
* @return true if precision is explicitly specified, false if unspecified
55+
*/
56+
public boolean hasPrecision() {
57+
return precision != PRECISION_NOT_SPECIFIED;
58+
}
59+
4860
@Override
4961
public CoralTypeKind getKind() {
5062
return CoralTypeKind.TIMESTAMP;
@@ -72,6 +84,7 @@ public int hashCode() {
7284

7385
@Override
7486
public String toString() {
75-
return "TIMESTAMP(" + precision + ")" + (nullable ? " NULL" : " NOT NULL");
87+
String precisionStr = precision == PRECISION_NOT_SPECIFIED ? "" : "(" + precision + ")";
88+
return "TIMESTAMP" + precisionStr + (nullable ? " NULL" : " NOT NULL");
7689
}
7790
}

coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ public class HiveToCoralTypeConverterTest {
2323

2424
@Test
2525
public void testPrimitiveTypes() {
26+
// Test void/null type
27+
testPrimitiveType(TypeInfoFactory.voidTypeInfo, CoralTypeKind.NULL, true, null, null);
28+
2629
// Test boolean
2730
testPrimitiveType(TypeInfoFactory.booleanTypeInfo, CoralTypeKind.BOOLEAN, true, null, null);
2831

@@ -39,7 +42,8 @@ public void testPrimitiveTypes() {
3942

4043
// Test date/time types
4144
testPrimitiveType(TypeInfoFactory.dateTypeInfo, CoralTypeKind.DATE, true, null, null);
42-
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, 3, null);
45+
// TIMESTAMP has PRECISION_NOT_SPECIFIED (-1) to match legacy TypeConverter behavior
46+
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, -1, null);
4347

4448
// Test binary
4549
testPrimitiveType(TypeInfoFactory.binaryTypeInfo, CoralTypeKind.BINARY, true, null, null);
@@ -193,13 +197,16 @@ public void testUnionType() {
193197
assertTrue(result instanceof StructType);
194198
StructType structType = (StructType) result;
195199

196-
// Union is converted to a struct with fields for each possible type
200+
// Union is converted to a struct with "tag" field first, then fields for each possible type
201+
// This matches the Trino union representation: {tag, field0, field1, ...}
197202
List<StructField> fields = structType.getFields();
198-
assertEquals(fields.size(), 2);
199-
assertEquals(fields.get(0).getName(), "field0");
203+
assertEquals(fields.size(), 3); // tag + 2 union member fields
204+
assertEquals(fields.get(0).getName(), "tag");
200205
assertEquals(fields.get(0).getType().getKind(), CoralTypeKind.INT);
201-
assertEquals(fields.get(1).getName(), "field1");
202-
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.STRING);
206+
assertEquals(fields.get(1).getName(), "field0");
207+
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.INT);
208+
assertEquals(fields.get(2).getName(), "field1");
209+
assertEquals(fields.get(2).getType().getKind(), CoralTypeKind.STRING);
203210
}
204211

205212
@Test(expectedExceptions = IllegalArgumentException.class)

coral-common/src/test/java/com/linkedin/coral/common/types/CoralTypeSystemTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ public void testTypeEquality() {
203203
PrimitiveType intType1 = PrimitiveType.of(CoralTypeKind.INT, false);
204204
PrimitiveType intType2 = PrimitiveType.of(CoralTypeKind.INT, false);
205205
PrimitiveType nullableIntType = PrimitiveType.of(CoralTypeKind.INT, true);
206-
207206
assertEquals(intType1, intType2);
208207
assertNotEquals(intType1, nullableIntType);
209208
assertEquals(intType1.hashCode(), intType2.hashCode());

0 commit comments

Comments
 (0)