Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 110 additions & 1 deletion coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -39,6 +39,11 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.linkedin.coral.common.types.CoralDataType;
import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter;
import com.linkedin.coral.common.types.StructField;
import com.linkedin.coral.common.types.StructType;


/**
* Adaptor class from Hive {@link org.apache.hadoop.hive.metastore.api.Table} representation to
Expand Down Expand Up @@ -134,12 +139,82 @@ private void checkDaliTable() {
// Preconditions.checkState(isDaliTable());
}

/**
* Returns the row type (schema) for this table.
*
* Current behavior (validation/shadow mode):
* - Always returns the legacy Hive → Calcite direct conversion
* - Validates against the new Hive → Coral → Calcite two-stage conversion
* - Logs warnings if conversions don't match or if validation fails
*
* This allows safe validation of the new conversion path in production
* before switching to use it as the primary path.
*
* @param typeFactory Calcite type factory
* @return RelDataType representing the table schema
*/
@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
// Always compute and return the legacy Hive direct conversion (production path)
RelDataType hiveType = getRowTypeFromHiveType(typeFactory);

// Validate against new two-stage Coral conversion (shadow/validation mode)
try {
RelDataType coralType = getRowTypeFromCoralType(typeFactory);

// Compare the two type representations
if (!hiveType.equals(coralType)) {
LOG.warn("Hive and Coral type conversion mismatch for table {}.{}. Hive: {}, Coral: {}", hiveTable.getDbName(),
hiveTable.getTableName(), hiveType, coralType);
}
} catch (Exception e) {
// Log validation failure but continue with Hive type (zero production impact)
LOG.warn("Coral type validation failed for table {}.{}. Proceeding with Hive type. Error: {}",
hiveTable.getDbName(), hiveTable.getTableName(), e.getMessage(), e);
}

// Always return the battle-tested Hive conversion result
return hiveType;
}

/**
* Two-stage conversion: Hive → Coral → Calcite.
* This is the preferred path when using CoralCatalog.
*/
private RelDataType getRowTypeFromCoralType(RelDataTypeFactory typeFactory) {
// Stage 1: Hive → Coral
CoralDataType coralSchema = getCoralSchema();

// Stage 2: Coral → Calcite
if (!(coralSchema instanceof StructType)) {
throw new IllegalStateException("Expected StructType from getCoralSchema(), got: " + coralSchema.getClass());
}

StructType structType = (StructType) coralSchema;
List<StructField> fields = structType.getFields();

List<RelDataType> fieldTypes = new ArrayList<>(fields.size());
List<String> fieldNames = new ArrayList<>(fields.size());

for (StructField field : fields) {
fieldNames.add(field.getName());
RelDataType fieldType = CoralTypeToRelDataTypeConverter.convert(field.getType(), typeFactory);
fieldTypes.add(fieldType);
}

return typeFactory.createStructType(fieldTypes, fieldNames);
}

/**
* Direct conversion: Hive → Calcite.
* This is the legacy path for backward compatibility.
*/
private RelDataType getRowTypeFromHiveType(RelDataTypeFactory typeFactory) {
final List<FieldSchema> cols = getColumns();
final List<RelDataType> fieldTypes = new ArrayList<>(cols.size());
final List<String> fieldNames = new ArrayList<>(cols.size());
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());

allCols.forEach(col -> {
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory);
Expand All @@ -153,6 +228,40 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
return typeFactory.createStructType(fieldTypes, fieldNames);
}

/**
* Returns the table schema in Coral type system.
* This includes both regular columns (from StorageDescriptor) and partition columns.
* Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter.
*
* @return StructType representing the full table schema (columns + partitions)
*/
public CoralDataType getCoralSchema() {
final List<FieldSchema> cols = getColumns();
final List<StructField> fields = new ArrayList<>();
final List<String> fieldNames = new ArrayList<>();

// Combine regular columns and partition keys (same as HiveTable.getRowType)
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());

for (FieldSchema col : allCols) {
final String colName = col.getName();

// Skip duplicate columns (partition keys might overlap with regular columns)
if (!fieldNames.contains(colName)) {
// Convert Hive type string to TypeInfo, then to CoralDataType
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo);

fields.add(StructField.of(colName, coralType));
fieldNames.add(colName);
}
}

// Return struct type representing the table schema
// Table-level struct is nullable (Hive convention)
return StructType.of(fields, true);
}

private List<FieldSchema> getColumns() {
StorageDescriptor sd = hiveTable.getSd();
String serDeLib = getSerializationLib();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
case DATE:
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
case TIMESTAMP:
// Default to microsecond precision (6)
return TimestampType.of(3, nullable);
// Hive TIMESTAMP has no explicit precision (matches TypeConverter behavior)
// Use PRECISION_NOT_SPECIFIED (-1) to match Calcite's behavior
return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable);
case BINARY:
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
case DECIMAL:
Expand All @@ -86,6 +87,7 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
CharTypeInfo charType = (CharTypeInfo) type;
return CharType.of(charType.getLength(), nullable);
case VOID:
return PrimitiveType.of(CoralTypeKind.NULL, true);
case UNKNOWN:
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
default:
Expand Down Expand Up @@ -118,12 +120,18 @@ private static CoralDataType convertStruct(StructTypeInfo structType) {
}

private static CoralDataType convertUnion(UnionTypeInfo unionType) {
// For UNION types, we'll create a struct with all possible fields
// This is similar to how some systems handle union types
// For UNION types, create a struct conforming to Trino's union representation
// Schema: {tag, field0, field1, ..., fieldN}
// See: https://github.com/trinodb/trino/pull/3483
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();

// Create fields for each possible type in the union
// Create fields: "tag" field first (INTEGER), then "field0", "field1", etc.
List<StructField> fields = new ArrayList<>();

// Add "tag" field (INTEGER) to indicate which union member is active
fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true)));

// Add fields for each possible type in the union
for (int i = 0; i < memberTypes.size(); i++) {
CoralDataType fieldType = convert(memberTypes.get(i));
fields.add(StructField.of("field" + i, fieldType));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ public enum CoralTypeKind {
// Binary types
BINARY,

// Special types
NULL,

// Complex types
ARRAY,
MAP,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ public static RelDataType convert(CoralDataType type, RelDataTypeFactory factory
relType = convertPrimitive((PrimitiveType) type, factory);
} else if (type instanceof TimestampType) {
TimestampType ts = (TimestampType) type;
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
// Handle unspecified precision (Hive compatibility)
if (ts.hasPrecision()) {
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
} else {
// No precision specified - matches TypeConverter behavior
relType = factory.createSqlType(SqlTypeName.TIMESTAMP);
}
} else if (type instanceof DecimalType) {
DecimalType dec = (DecimalType) type;
relType = factory.createSqlType(SqlTypeName.DECIMAL, dec.getPrecision(), dec.getScale());
Expand Down Expand Up @@ -107,6 +113,8 @@ private static RelDataType convertPrimitive(PrimitiveType prim, RelDataTypeFacto
return factory.createSqlType(SqlTypeName.TIME);
case BINARY:
return factory.createSqlType(SqlTypeName.BINARY);
case NULL:
return factory.createSqlType(SqlTypeName.NULL);
default:
// Fallback for unsupported primitive types
return factory.createSqlType(SqlTypeName.ANY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,28 @@
* Represents a TIMESTAMP type with fractional second precision in the Coral type system.
*
* Precision indicates the number of fractional digits of seconds, e.g.:
* - -1: unspecified (PRECISION_NOT_SPECIFIED, for Hive compatibility)
* - 0: seconds
* - 3: milliseconds
* - 6: microseconds
* - 9: nanoseconds
*/
public final class TimestampType implements CoralDataType {
/** Constant for unspecified precision (matches Calcite's RelDataType.PRECISION_NOT_SPECIFIED) */
public static final int PRECISION_NOT_SPECIFIED = -1;

private final int precision;
private final boolean nullable;

/**
* Create a TIMESTAMP type with the given precision and nullability.
* @param precision fractional second precision (0-9)
* @param precision fractional second precision (-1 for unspecified, or 0-9)
* @param nullable whether this type allows null values
*/
public static TimestampType of(int precision, boolean nullable) {
if (precision < 0 || precision > 9) {
throw new IllegalArgumentException("Timestamp precision must be in range [0, 9], got: " + precision);
if (precision != PRECISION_NOT_SPECIFIED && (precision < 0 || precision > 9)) {
throw new IllegalArgumentException(
"Timestamp precision must be -1 (unspecified) or in range [0, 9], got: " + precision);
}
return new TimestampType(precision, nullable);
}
Expand All @@ -39,12 +44,19 @@ private TimestampType(int precision, boolean nullable) {
}

/**
* @return the fractional second precision (0-9)
* @return the fractional second precision (-1 for unspecified, or 0-9)
*/
public int getPrecision() {
return precision;
}

/**
* @return true if precision is explicitly specified, false if unspecified
*/
public boolean hasPrecision() {
return precision != PRECISION_NOT_SPECIFIED;
}

@Override
public CoralTypeKind getKind() {
return CoralTypeKind.TIMESTAMP;
Expand Down Expand Up @@ -72,6 +84,7 @@ public int hashCode() {

@Override
public String toString() {
return "TIMESTAMP(" + precision + ")" + (nullable ? " NULL" : " NOT NULL");
String precisionStr = precision == PRECISION_NOT_SPECIFIED ? "" : "(" + precision + ")";
return "TIMESTAMP" + precisionStr + (nullable ? " NULL" : " NOT NULL");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ public class HiveToCoralTypeConverterTest {

@Test
public void testPrimitiveTypes() {
// Test void/null type
testPrimitiveType(TypeInfoFactory.voidTypeInfo, CoralTypeKind.NULL, true, null, null);

// Test boolean
testPrimitiveType(TypeInfoFactory.booleanTypeInfo, CoralTypeKind.BOOLEAN, true, null, null);

Expand All @@ -39,7 +42,8 @@ public void testPrimitiveTypes() {

// Test date/time types
testPrimitiveType(TypeInfoFactory.dateTypeInfo, CoralTypeKind.DATE, true, null, null);
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, 3, null);
// TIMESTAMP has PRECISION_NOT_SPECIFIED (-1) to match legacy TypeConverter behavior
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, -1, null);

// Test binary
testPrimitiveType(TypeInfoFactory.binaryTypeInfo, CoralTypeKind.BINARY, true, null, null);
Expand Down Expand Up @@ -193,13 +197,16 @@ public void testUnionType() {
assertTrue(result instanceof StructType);
StructType structType = (StructType) result;

// Union is converted to a struct with fields for each possible type
// Union is converted to a struct with "tag" field first, then fields for each possible type
// This matches the Trino union representation: {tag, field0, field1, ...}
List<StructField> fields = structType.getFields();
assertEquals(fields.size(), 2);
assertEquals(fields.get(0).getName(), "field0");
assertEquals(fields.size(), 3); // tag + 2 union member fields
assertEquals(fields.get(0).getName(), "tag");
assertEquals(fields.get(0).getType().getKind(), CoralTypeKind.INT);
assertEquals(fields.get(1).getName(), "field1");
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.STRING);
assertEquals(fields.get(1).getName(), "field0");
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.INT);
assertEquals(fields.get(2).getName(), "field1");
assertEquals(fields.get(2).getType().getKind(), CoralTypeKind.STRING);
}

@Test(expectedExceptions = IllegalArgumentException.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ public void testTypeEquality() {
PrimitiveType intType1 = PrimitiveType.of(CoralTypeKind.INT, false);
PrimitiveType intType2 = PrimitiveType.of(CoralTypeKind.INT, false);
PrimitiveType nullableIntType = PrimitiveType.of(CoralTypeKind.INT, true);

assertEquals(intType1, intType2);
assertNotEquals(intType1, nullableIntType);
assertEquals(intType1.hashCode(), intType2.hashCode());
Expand Down