Skip to content

Commit 0c4b97c

Browse files
committed
enhance coraltype system
1 parent 5ea6754 commit 0c4b97c

File tree

6 files changed

+153
-18
lines changed

6 files changed

+153
-18
lines changed

coral-common/src/main/java/com/linkedin/coral/common/HiveTable.java

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
import com.google.common.base.Splitter;
1717
import com.google.common.collect.ImmutableList;
1818
import com.google.common.collect.Iterables;
19-
19+
import com.linkedin.coral.common.types.CoralDataType;
20+
import com.linkedin.coral.common.types.CoralTypeToRelDataTypeConverter;
21+
import com.linkedin.coral.common.types.StructField;
22+
import com.linkedin.coral.common.types.StructType;
2023
import org.apache.calcite.DataContext;
2124
import org.apache.calcite.config.CalciteConnectionConfig;
2225
import org.apache.calcite.linq4j.Enumerable;
@@ -134,12 +137,70 @@ private void checkDaliTable() {
134137
// Preconditions.checkState(isDaliTable());
135138
}
136139

140+
/**
141+
* Returns the row type (schema) for this table.
142+
*
143+
* Two conversion paths are supported:
144+
* 1. Two-stage (preferred): Hive → Coral → Calcite
145+
* 2. Direct (legacy): Hive → Calcite (for backward compatibility)
146+
*
147+
* The two-stage conversion enables using Coral type system as an intermediary,
148+
* allowing better type system unification and testing.
149+
*
150+
* @param typeFactory Calcite type factory
151+
* @return RelDataType representing the table schema
152+
*/
137153
@Override
138154
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
155+
// Use two-stage conversion if HiveCoralTable is available
156+
try {
157+
return getRowTypeViaCoralTypeSystem(typeFactory);
158+
} catch (Exception e) {
159+
// Fall back to direct conversion if two-stage conversion fails
160+
LOG.warn("Two-stage type conversion failed for table {}, falling back to direct conversion. Error: {}",
161+
hiveTable.getTableName(), e.getMessage(), e);
162+
return getRowTypeDirectConversion(typeFactory);
163+
}
164+
}
165+
166+
/**
167+
* Two-stage conversion: Hive → Coral → Calcite.
168+
* This is the preferred path when using CoralCatalog.
169+
*/
170+
private RelDataType getRowTypeViaCoralTypeSystem(RelDataTypeFactory typeFactory) {
171+
// Stage 1: Hive → Coral
172+
CoralDataType coralSchema = getCoralSchema();
173+
174+
// Stage 2: Coral → Calcite
175+
if (!(coralSchema instanceof StructType)) {
176+
throw new IllegalStateException("Expected StructType from getCoralSchema(), got: " + coralSchema.getClass());
177+
}
178+
179+
StructType structType = (StructType) coralSchema;
180+
List<StructField> fields = structType.getFields();
181+
182+
List<RelDataType> fieldTypes = new ArrayList<>(fields.size());
183+
List<String> fieldNames = new ArrayList<>(fields.size());
184+
185+
for (StructField field : fields) {
186+
fieldNames.add(field.getName());
187+
RelDataType fieldType = CoralTypeToRelDataTypeConverter.convert(field.getType(), typeFactory);
188+
fieldTypes.add(fieldType);
189+
}
190+
191+
return typeFactory.createStructType(fieldTypes, fieldNames);
192+
}
193+
194+
/**
195+
* Direct conversion: Hive → Calcite.
196+
* This is the legacy path for backward compatibility.
197+
*/
198+
private RelDataType getRowTypeDirectConversion(RelDataTypeFactory typeFactory) {
139199
final List<FieldSchema> cols = getColumns();
140200
final List<RelDataType> fieldTypes = new ArrayList<>(cols.size());
141201
final List<String> fieldNames = new ArrayList<>(cols.size());
142202
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());
203+
143204
allCols.forEach(col -> {
144205
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
145206
final RelDataType relType = TypeConverter.convert(typeInfo, typeFactory);
@@ -153,6 +214,41 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
153214
return typeFactory.createStructType(fieldTypes, fieldNames);
154215
}
155216

217+
/**
218+
* Returns the table schema in Coral type system.
219+
* This includes both regular columns (from StorageDescriptor) and partition columns.
220+
* Converts Hive TypeInfo to Coral types using HiveToCoralTypeConverter.
221+
*
222+
* @return StructType representing the full table schema (columns + partitions)
223+
*/
224+
@Override
225+
public CoralDataType getCoralSchema() {
226+
final List<FieldSchema> cols = getColumns();
227+
final List<StructField> fields = new ArrayList<>();
228+
final List<String> fieldNames = new ArrayList<>();
229+
230+
// Combine regular columns and partition keys (same as HiveTable.getRowType)
231+
final Iterable<FieldSchema> allCols = Iterables.concat(cols, hiveTable.getPartitionKeys());
232+
233+
for (FieldSchema col : allCols) {
234+
final String colName = col.getName();
235+
236+
// Skip duplicate columns (partition keys might overlap with regular columns)
237+
if (!fieldNames.contains(colName)) {
238+
// Convert Hive type string to TypeInfo, then to CoralDataType
239+
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
240+
final CoralDataType coralType = HiveToCoralTypeConverter.convert(typeInfo);
241+
242+
fields.add(StructField.of(colName, coralType));
243+
fieldNames.add(colName);
244+
}
245+
}
246+
247+
// Return struct type representing the table schema
248+
// Table-level struct is nullable (Hive convention)
249+
return StructType.of(fields, true);
250+
}
251+
156252
private List<FieldSchema> getColumns() {
157253
StorageDescriptor sd = hiveTable.getSd();
158254
String serDeLib = getSerializationLib();

coral-common/src/main/java/com/linkedin/coral/common/HiveToCoralTypeConverter.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
7272
case DATE:
7373
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
7474
case TIMESTAMP:
75-
// Default to microsecond precision (6)
76-
return TimestampType.of(3, nullable);
75+
// Hive TIMESTAMP has no explicit precision (matches TypeConverter behavior)
76+
// Use PRECISION_NOT_SPECIFIED (-1) to match Calcite's behavior
77+
return TimestampType.of(TimestampType.PRECISION_NOT_SPECIFIED, nullable);
7778
case BINARY:
7879
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
7980
case DECIMAL:
@@ -86,6 +87,7 @@ private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
8687
CharTypeInfo charType = (CharTypeInfo) type;
8788
return CharType.of(charType.getLength(), nullable);
8889
case VOID:
90+
return PrimitiveType.of(CoralTypeKind.NULL, true);
8991
case UNKNOWN:
9092
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
9193
default:
@@ -118,12 +120,18 @@ private static CoralDataType convertStruct(StructTypeInfo structType) {
118120
}
119121

120122
private static CoralDataType convertUnion(UnionTypeInfo unionType) {
121-
// For UNION types, we'll create a struct with all possible fields
122-
// This is similar to how some systems handle union types
123+
// For UNION types, create a struct conforming to Trino's union representation
124+
// Schema: {tag, field0, field1, ..., fieldN}
125+
// See: https://github.com/trinodb/trino/pull/3483
123126
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();
124127

125-
// Create fields for each possible type in the union
128+
// Create fields: "tag" field first (INTEGER), then "field0", "field1", etc.
126129
List<StructField> fields = new ArrayList<>();
130+
131+
// Add "tag" field (INTEGER) to indicate which union member is active
132+
fields.add(StructField.of("tag", PrimitiveType.of(CoralTypeKind.INT, true)));
133+
134+
// Add fields for each possible type in the union
127135
for (int i = 0; i < memberTypes.size(); i++) {
128136
CoralDataType fieldType = convert(memberTypes.get(i));
129137
fields.add(StructField.of("field" + i, fieldType));

coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeKind.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ public enum CoralTypeKind {
3434
// Binary types
3535
BINARY,
3636

37+
// Special types
38+
NULL,
39+
3740
// Complex types
3841
ARRAY,
3942
MAP,

coral-common/src/main/java/com/linkedin/coral/common/types/CoralTypeToRelDataTypeConverter.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,13 @@ public static RelDataType convert(CoralDataType type, RelDataTypeFactory factory
3636
relType = convertPrimitive((PrimitiveType) type, factory);
3737
} else if (type instanceof TimestampType) {
3838
TimestampType ts = (TimestampType) type;
39-
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
39+
// Handle unspecified precision (Hive compatibility)
40+
if (ts.hasPrecision()) {
41+
relType = factory.createSqlType(SqlTypeName.TIMESTAMP, ts.getPrecision());
42+
} else {
43+
// No precision specified - matches TypeConverter behavior
44+
relType = factory.createSqlType(SqlTypeName.TIMESTAMP);
45+
}
4046
} else if (type instanceof DecimalType) {
4147
DecimalType dec = (DecimalType) type;
4248
relType = factory.createSqlType(SqlTypeName.DECIMAL, dec.getPrecision(), dec.getScale());
@@ -107,6 +113,8 @@ private static RelDataType convertPrimitive(PrimitiveType prim, RelDataTypeFacto
107113
return factory.createSqlType(SqlTypeName.TIME);
108114
case BINARY:
109115
return factory.createSqlType(SqlTypeName.BINARY);
116+
case NULL:
117+
return factory.createSqlType(SqlTypeName.NULL);
110118
default:
111119
// Fallback for unsupported primitive types
112120
return factory.createSqlType(SqlTypeName.ANY);

coral-common/src/main/java/com/linkedin/coral/common/types/TimestampType.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,28 @@
1212
* Represents a TIMESTAMP type with fractional second precision in the Coral type system.
1313
*
1414
* Precision indicates the number of fractional digits of seconds, e.g.:
15+
* - -1: unspecified (PRECISION_NOT_SPECIFIED, for Hive compatibility)
1516
* - 0: seconds
1617
* - 3: milliseconds
1718
* - 6: microseconds
1819
* - 9: nanoseconds
1920
*/
2021
public final class TimestampType implements CoralDataType {
22+
/** Constant for unspecified precision (matches Calcite's RelDataType.PRECISION_NOT_SPECIFIED) */
23+
public static final int PRECISION_NOT_SPECIFIED = -1;
24+
2125
private final int precision;
2226
private final boolean nullable;
2327

2428
/**
2529
* Create a TIMESTAMP type with the given precision and nullability.
26-
* @param precision fractional second precision (0-9)
30+
* @param precision fractional second precision (-1 for unspecified, or 0-9)
2731
* @param nullable whether this type allows null values
2832
*/
2933
public static TimestampType of(int precision, boolean nullable) {
30-
if (precision < 0 || precision > 9) {
31-
throw new IllegalArgumentException("Timestamp precision must be in range [0, 9], got: " + precision);
34+
if (precision != PRECISION_NOT_SPECIFIED && (precision < 0 || precision > 9)) {
35+
throw new IllegalArgumentException(
36+
"Timestamp precision must be -1 (unspecified) or in range [0, 9], got: " + precision);
3237
}
3338
return new TimestampType(precision, nullable);
3439
}
@@ -39,12 +44,19 @@ private TimestampType(int precision, boolean nullable) {
3944
}
4045

4146
/**
42-
* @return the fractional second precision (0-9)
47+
* @return the fractional second precision (-1 for unspecified, or 0-9)
4348
*/
4449
public int getPrecision() {
4550
return precision;
4651
}
4752

53+
/**
54+
* @return true if precision is explicitly specified, false if unspecified
55+
*/
56+
public boolean hasPrecision() {
57+
return precision != PRECISION_NOT_SPECIFIED;
58+
}
59+
4860
@Override
4961
public CoralTypeKind getKind() {
5062
return CoralTypeKind.TIMESTAMP;
@@ -72,6 +84,7 @@ public int hashCode() {
7284

7385
@Override
7486
public String toString() {
75-
return "TIMESTAMP(" + precision + ")" + (nullable ? " NULL" : " NOT NULL");
87+
String precisionStr = precision == PRECISION_NOT_SPECIFIED ? "" : "(" + precision + ")";
88+
return "TIMESTAMP" + precisionStr + (nullable ? " NULL" : " NOT NULL");
7689
}
7790
}

coral-common/src/test/java/com/linkedin/coral/common/HiveToCoralTypeConverterTest.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ public class HiveToCoralTypeConverterTest {
2323

2424
@Test
2525
public void testPrimitiveTypes() {
26+
// Test void/null type
27+
testPrimitiveType(TypeInfoFactory.voidTypeInfo, CoralTypeKind.NULL, true, null, null);
28+
2629
// Test boolean
2730
testPrimitiveType(TypeInfoFactory.booleanTypeInfo, CoralTypeKind.BOOLEAN, true, null, null);
2831

@@ -39,7 +42,8 @@ public void testPrimitiveTypes() {
3942

4043
// Test date/time types
4144
testPrimitiveType(TypeInfoFactory.dateTypeInfo, CoralTypeKind.DATE, true, null, null);
42-
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, 3, null);
45+
// TIMESTAMP has PRECISION_NOT_SPECIFIED (-1) to match legacy TypeConverter behavior
46+
testPrimitiveType(TypeInfoFactory.timestampTypeInfo, CoralTypeKind.TIMESTAMP, true, -1, null);
4347

4448
// Test binary
4549
testPrimitiveType(TypeInfoFactory.binaryTypeInfo, CoralTypeKind.BINARY, true, null, null);
@@ -193,13 +197,16 @@ public void testUnionType() {
193197
assertTrue(result instanceof StructType);
194198
StructType structType = (StructType) result;
195199

196-
// Union is converted to a struct with fields for each possible type
200+
// Union is converted to a struct with "tag" field first, then fields for each possible type
201+
// This matches the Trino union representation: {tag, field0, field1, ...}
197202
List<StructField> fields = structType.getFields();
198-
assertEquals(fields.size(), 2);
199-
assertEquals(fields.get(0).getName(), "field0");
203+
assertEquals(fields.size(), 3); // tag + 2 union member fields
204+
assertEquals(fields.get(0).getName(), "tag");
200205
assertEquals(fields.get(0).getType().getKind(), CoralTypeKind.INT);
201-
assertEquals(fields.get(1).getName(), "field1");
202-
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.STRING);
206+
assertEquals(fields.get(1).getName(), "field0");
207+
assertEquals(fields.get(1).getType().getKind(), CoralTypeKind.INT);
208+
assertEquals(fields.get(2).getName(), "field1");
209+
assertEquals(fields.get(2).getType().getKind(), CoralTypeKind.STRING);
203210
}
204211

205212
@Test(expectedExceptions = IllegalArgumentException.class)

0 commit comments

Comments
 (0)