diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java index 1df9fa940..e5807ffaf 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2025 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -39,6 +39,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.linkedin.coral.common.types.CoralDataType; +import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter; +import com.linkedin.coral.common.types.StructField; +import com.linkedin.coral.common.types.StructType; + /** * Adaptor class from Hive {@link org.apache.hadoop.hive.metastore.api.Table} representation to @@ -134,12 +139,82 @@ private void checkDaliTable() { // Preconditions.checkState(isDaliTable()); } + /** + * Returns the row type (schema) for this table. + * + * Current behavior (validation/shadow mode): + * - Always returns the legacy Hive → Calcite direct conversion + * - Validates against the new Hive → Coral → Calcite two-stage conversion + * - Logs warnings if conversions don't match or if validation fails + * + * This allows safe validation of the new conversion path in production + * before switching to use it as the primary path. + * + * @param typeFactory Calcite type factory + * @return RelDataType representing the table schema + */ @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { + // Always compute and return the legacy Hive direct conversion (production path) + RelDataType hiveType = getRowTypeFromHiveType(typeFactory); + + // Validate against new two-stage Coral conversion (shadow/validation mode) + try { + RelDataType coralType = getRowTypeFromCoralType(typeFactory); + + // Compare the two type representations + if (!hiveType.equals(coralType)) { + LOG.warn("Hive and Coral type conversion mismatch for table {}.{}. Hive: {}, Coral: {}", hiveTable.getDbName(), + hiveTable.getTableName(), hiveType, coralType); + } + } catch (Exception e) { + // Log validation failure but continue with Hive type (zero production impact) + LOG.warn("Coral type validation failed for table {}.{}. Proceeding with Hive type. Error: {}", + hiveTable.getDbName(), hiveTable.getTableName(), e.getMessage(), e); + } + + // Always return the battle-tested Hive conversion result + return hiveType; + } + + /** + * Two-stage conversion: Hive → Coral → Calcite. + * This is the preferred path when using CoralCatalog. + */ + private RelDataType getRowTypeFromCoralType(RelDataTypeFactory typeFactory) { + // Stage 1: Hive → Coral + CoralDataType coralSchema = getCoralSchema(); + + // Stage 2: Coral → Calcite + if (!(coralSchema instanceof StructType)) { + throw new IllegalStateException("Expected StructType from getCoralSchema(), got: " + coralSchema.getClass()); + } + + StructType structType = (StructType) coralSchema; + List fields = structType.getFields(); + + List fieldTypes = new ArrayList<>(fields.size()); + List fieldNames = new ArrayList<>(fields.size()); + + for (StructField field : fields) { + fieldNames.add(field.getName()); + RelDataType fieldType = CoralTypeToRelDataTypeConverter.convert(field.getType(), typeFactory); + fieldTypes.add(fieldType); + } + + return typeFactory.createStructType(fieldTypes, fieldNames); + } + + /** + * Direct conversion: Hive → Calcite. + * This is the legacy path for backward compatibility. + */ + private RelDataType getRowTypeFromHiveType(RelDataTypeFactory typeFactory) { final List cols = getColumns(); final List fieldTypes = new ArrayList<>(cols.size()); final List fieldNames = new ArrayList<>(cols.size()); final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); + allCols.forEach(col -> { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory); @@ -153,6 +228,40 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) { return typeFactory.createStructType(fieldTypes, fieldNames); } + /** + * Returns the table schema in Coral type system. + * This includes both regular columns (from StorageDescriptor) and partition columns. + * Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter. + * + * @return StructType representing the full table schema (columns + partitions) + */ + public CoralDataType getCoralSchema() { + final List cols = getColumns(); + final List fields = new ArrayList<>(); + final List fieldNames = new ArrayList<>(); + + // Combine regular columns and partition keys (same as HiveTable.getRowType) + final Iterable allCols = Iterables.concat(cols, hiveTable.getPartitionKeys()); + + for (FieldSchema col : allCols) { + final String colName = col.getName(); + + // Skip duplicate columns (partition keys might overlap with regular columns) + if (!fieldNames.contains(colName)) { + // Convert Hive type string to TypeInfo, then to CoralDataType + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType()); + final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo); + + fields.add(StructField.of(colName, coralType)); + fieldNames.add(colName); + } + } + + // Return struct type representing the table schema + // Table-level struct is nullable (Hive convention) + return StructType.of(fields, true); + } + private List getColumns() { StorageDescriptor sd = hiveTable.getSd(); String serDeLib = getSerializationLib(); diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java index b832134f3..a5cd99e71 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java @@ -72,8 +72,9 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) { case DATE: return PrimitiveType.of(CoralTypeKind.DATE, nullable); case TIMESTAMP: - // Default to microsecond precision (6) - return TimestampType.of(3, nullable); + // Hive TIMESTAMP has no explicit precision (matches TypeConverter behavior) + // Use PRECISION_NOT_SPECIFIED (-1) to match Calcite's behavior + return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable); case BINARY: return PrimitiveType.of(CoralTypeKind.BINARY, nullable); case DECIMAL: @@ -86,6 +87,7 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) { CharTypeInfo charType = (CharTypeInfo) type; return CharType.of(charType.getLength(), nullable); case VOID: + return PrimitiveType.of(CoralTypeKind.NULL, true); case UNKNOWN: return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback default: @@ -118,12 +120,18 @@ private static CoralDataType convertStruct(StructTypeInfo structType) { } private static CoralDataType convertUnion(UnionTypeInfo unionType) { - // For UNION types, we'll create a struct with all possible fields - // This is similar to how some systems handle union types + // For UNION types, create a struct conforming to Trino's union representation + // Schema: {tag, field0, field1, ..., fieldN} + // See: https://github.com/trinodb/trino/pull/3483 List memberTypes = unionType.getAllUnionObjectTypeInfos(); - // Create fields for each possible type in the union + // Create fields: "tag" field first (INTEGER), then "field0", "field1", etc. List fields = new ArrayList<>(); + + // Add "tag" field (INTEGER) to indicate which union member is active + fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true))); + + // Add fields for each possible type in the union for (int i = 0; i < memberTypes.size(); i++) { CoralDataType fieldType = convert(memberTypes.get(i)); fields.add(StructField.of("field" + i, fieldType)); diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java b/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java index e7aff9806..8d0160cc2 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java @@ -34,6 +34,9 @@ public enum CoralTypeKind { // Binary types BINARY, + // Special types + NULL, + // Complex types ARRAY, MAP, diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java index dd783cd4c..6b571c8f5 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java @@ -36,7 +36,13 @@ public static RelDataType convert(CoralDataType type, RelDataTypeFactory factory relType = convertPrimitive((PrimitiveType) type, factory); } else if (type instanceof TimestampType) { TimestampType ts = (TimestampType) type; - relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision()); + // Handle unspecified precision (Hive compatibility) + if (ts.hasPrecision()) { + relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision()); + } else { + // No precision specified - matches TypeConverter behavior + relType = factory.createSqlType(SqlTypeName.TIMESTAMP); + } } else if (type instanceof DecimalType) { DecimalType dec = (DecimalType) type; relType = factory.createSqlType(SqlTypeName.DECIMAL, dec.getPrecision(), dec.getScale()); @@ -107,6 +113,8 @@ private static RelDataType convertPrimitive(PrimitiveType prim, RelDataTypeFacto return factory.createSqlType(SqlTypeName.TIME); case BINARY: return factory.createSqlType(SqlTypeName.BINARY); + case NULL: + return factory.createSqlType(SqlTypeName.NULL); default: // Fallback for unsupported primitive types return factory.createSqlType(SqlTypeName.ANY); diff --git a/coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java b/coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java index cb9f3f5b6..e875e7fed 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java @@ -12,23 +12,28 @@ * Represents a TIMESTAMP type with fractional second precision in the Coral type system. * * Precision indicates the number of fractional digits of seconds, e.g.: + * - -1: unspecified (PRECISION_NOT_SPECIFIED, for Hive compatibility) * - 0: seconds * - 3: milliseconds * - 6: microseconds * - 9: nanoseconds */ public final class TimestampType implements CoralDataType { + /** Constant for unspecified precision (matches Calcite's RelDataType.PRECISION_NOT_SPECIFIED) */ + public static final int PRECISION_NOT_SPECIFIED = -1; + private final int precision; private final boolean nullable; /** * Create a TIMESTAMP type with the given precision and nullability. - * @param precision fractional second precision (0-9) + * @param precision fractional second precision (-1 for unspecified, or 0-9) * @param nullable whether this type allows null values */ public static TimestampType of(int precision, boolean nullable) { - if (precision < 0 || precision > 9) { - throw new IllegalArgumentException("Timestamp precision must be in range [0, 9], got: " + precision); + if (precision != PRECISION_NOT_SPECIFIED && (precision < 0 || precision > 9)) { + throw new IllegalArgumentException( + "Timestamp precision must be -1 (unspecified) or in range [0, 9], got: " + precision); } return new TimestampType(precision, nullable); } @@ -39,12 +44,19 @@ private TimestampType(int precision, boolean nullable) { } /** - * @return the fractional second precision (0-9) + * @return the fractional second precision (-1 for unspecified, or 0-9) */ public int getPrecision() { return precision; } + /** + * @return true if precision is explicitly specified, false if unspecified + */ + public boolean hasPrecision() { + return precision != PRECISION_NOT_SPECIFIED; + } + @Override public CoralTypeKind getKind() { return CoralTypeKind.TIMESTAMP; @@ -72,6 +84,7 @@ public int hashCode() { @Override public String toString() { - return "TIMESTAMP(" + precision + ")" + (nullable ? " NULL" : " NOT NULL"); + String precisionStr = precision == PRECISION_NOT_SPECIFIED ? "" : "(" + precision + ")"; + return "TIMESTAMP" + precisionStr + (nullable ? " NULL" : " NOT NULL"); } } diff --git a/coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java b/coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java index be9348542..39ebe91e1 100644 --- a/coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java +++ b/coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java @@ -23,6 +23,9 @@ public class HiveToCoralTypeConverterTest { @Test public void testPrimitiveTypes() { + // Test void/null type + testPrimitiveType(TypeInfoFactory.voidTypeInfo, CoralTypeKind.NULL, true, null, null); + // Test boolean testPrimitiveType(TypeInfoFactory.booleanTypeInfo, CoralTypeKind.BOOLEAN, true, null, null); @@ -39,7 +42,8 @@ public void testPrimitiveTypes() { // Test date/time types testPrimitiveType(TypeInfoFactory.dateTypeInfo, CoralTypeKind.DATE, true, null, null); - testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, 3, null); + // TIMESTAMP has PRECISION_NOT_SPECIFIED (-1) to match legacy TypeConverter behavior + testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, -1, null); // Test binary testPrimitiveType(TypeInfoFactory.binaryTypeInfo, CoralTypeKind.BINARY, true, null, null); @@ -193,13 +197,16 @@ public void testUnionType() { assertTrue(result instanceof StructType); StructType structType = (StructType) result; - // Union is converted to a struct with fields for each possible type + // Union is converted to a struct with "tag" field first, then fields for each possible type + // This matches the Trino union representation: {tag, field0, field1, ...} List fields = structType.getFields(); - assertEquals(fields.size(), 2); - assertEquals(fields.get(0).getName(), "field0"); + assertEquals(fields.size(), 3); // tag + 2 union member fields + assertEquals(fields.get(0).getName(), "tag"); assertEquals(fields.get(0).getType().getKind(), CoralTypeKind.INT); - assertEquals(fields.get(1).getName(), "field1"); - assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.STRING); + assertEquals(fields.get(1).getName(), "field0"); + assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.INT); + assertEquals(fields.get(2).getName(), "field1"); + assertEquals(fields.get(2).getType().getKind(), CoralTypeKind.STRING); } @Test(expectedExceptions = IllegalArgumentException.class) diff --git a/coral-common/src/test/java/com/linkedin/coral/common/types/CoralTypeSystemTest.java b/coral-common/src/test/java/com/linkedin/coral/common/types/CoralTypeSystemTest.java index 294fa1f57..ce1f47b02 100644 --- a/coral-common/src/test/java/com/linkedin/coral/common/types/CoralTypeSystemTest.java +++ b/coral-common/src/test/java/com/linkedin/coral/common/types/CoralTypeSystemTest.java @@ -203,7 +203,6 @@ public void testTypeEquality() { PrimitiveType intType1 = PrimitiveType.of(CoralTypeKind.INT, false); PrimitiveType intType2 = PrimitiveType.of(CoralTypeKind.INT, false); PrimitiveType nullableIntType = PrimitiveType.of(CoralTypeKind.INT, true); - assertEquals(intType1, intType2); assertNotEquals(intType1, nullableIntType); assertEquals(intType1.hashCode(), intType2.hashCode());