Skip to content

Commit a4de642

Browse files
committed
Add Hive to Coral type conversion
1 parent 06b7386 commit a4de642

File tree

2 files changed

+403
-0
lines changed

2 files changed

+403
-0
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
import org.apache.hadoop.hive.serde2.typeinfo.*;
12+
13+
import com.linkedin.coral.common.types.*;
14+
15+
16+
/**
17+
* Converts Hive TypeInfo objects to Coral data types.
18+
* This enables integration between Hive's type system and Coral's type system.
19+
*/
20+
public final class HiveToCoralTypeConverter {
21+
22+
private HiveToCoralTypeConverter() {
23+
// Utility class - prevent instantiation
24+
}
25+
26+
/**
27+
* Converts a Hive TypeInfo to a Coral data type.
28+
* @param typeInfo the Hive type to convert
29+
* @return the corresponding Coral data type
30+
*/
31+
public static CoralDataType convert(TypeInfo typeInfo) {
32+
if (typeInfo == null) {
33+
throw new IllegalArgumentException("TypeInfo cannot be null");
34+
}
35+
36+
switch (typeInfo.getCategory()) {
37+
case PRIMITIVE:
38+
return convertPrimitive((PrimitiveTypeInfo) typeInfo);
39+
case LIST:
40+
return convertList((ListTypeInfo) typeInfo);
41+
case MAP:
42+
return convertMap((MapTypeInfo) typeInfo);
43+
case STRUCT:
44+
return convertStruct((StructTypeInfo) typeInfo);
45+
case UNION:
46+
return convertUnion((UnionTypeInfo) typeInfo);
47+
default:
48+
throw new UnsupportedOperationException("Unsupported type category: " + typeInfo.getCategory());
49+
}
50+
}
51+
52+
private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
53+
boolean nullable = true; // Hive types are generally nullable
54+
55+
switch (type.getPrimitiveCategory()) {
56+
case BOOLEAN:
57+
return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable);
58+
case BYTE:
59+
return PrimitiveType.of(CoralTypeKind.TINYINT, nullable);
60+
case SHORT:
61+
return PrimitiveType.of(CoralTypeKind.SMALLINT, nullable);
62+
case INT:
63+
return PrimitiveType.of(CoralTypeKind.INT, nullable);
64+
case LONG:
65+
return PrimitiveType.of(CoralTypeKind.BIGINT, nullable);
66+
case FLOAT:
67+
return PrimitiveType.of(CoralTypeKind.FLOAT, nullable);
68+
case DOUBLE:
69+
return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable);
70+
case STRING:
71+
return PrimitiveType.of(CoralTypeKind.STRING, nullable);
72+
case DATE:
73+
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
74+
case TIMESTAMP:
75+
// Default to microsecond precision (6)
76+
return TimestampType.of(3, nullable);
77+
case BINARY:
78+
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
79+
case DECIMAL:
80+
DecimalTypeInfo decimalType = (DecimalTypeInfo) type;
81+
return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable);
82+
case VARCHAR:
83+
VarcharTypeInfo varcharType = (VarcharTypeInfo) type;
84+
return VarcharType.of(varcharType.getLength(), nullable);
85+
case CHAR:
86+
CharTypeInfo charType = (CharTypeInfo) type;
87+
return CharType.of(charType.getLength(), nullable);
88+
case VOID:
89+
case UNKNOWN:
90+
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
91+
default:
92+
throw new UnsupportedOperationException("Unsupported primitive type: " + type.getPrimitiveCategory());
93+
}
94+
}
95+
96+
private static CoralDataType convertList(ListTypeInfo listType) {
97+
CoralDataType elementType = convert(listType.getListElementTypeInfo());
98+
return ArrayType.of(elementType, true); // Lists are nullable in Hive
99+
}
100+
101+
private static CoralDataType convertMap(MapTypeInfo mapType) {
102+
CoralDataType keyType = convert(mapType.getMapKeyTypeInfo());
103+
CoralDataType valueType = convert(mapType.getMapValueTypeInfo());
104+
return MapType.of(keyType, valueType, true); // Maps are nullable in Hive
105+
}
106+
107+
private static CoralDataType convertStruct(StructTypeInfo structType) {
108+
List<String> fieldNames = structType.getAllStructFieldNames();
109+
List<TypeInfo> fieldTypeInfos = structType.getAllStructFieldTypeInfos();
110+
111+
List<StructField> fields = new ArrayList<>();
112+
for (int i = 0; i < fieldTypeInfos.size(); i++) {
113+
CoralDataType fieldType = convert(fieldTypeInfos.get(i));
114+
fields.add(StructField.of(fieldNames.get(i), fieldType));
115+
}
116+
117+
return StructType.of(fields, true); // Structs are nullable in Hive
118+
}
119+
120+
private static CoralDataType convertUnion(UnionTypeInfo unionType) {
121+
// For UNION types, we'll create a struct with all possible fields
122+
// This is similar to how some systems handle union types
123+
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();
124+
125+
// Create fields for each possible type in the union
126+
List<StructField> fields = new ArrayList<>();
127+
for (int i = 0; i < memberTypes.size(); i++) {
128+
CoralDataType fieldType = convert(memberTypes.get(i));
129+
fields.add(StructField.of("field" + i, fieldType));
130+
}
131+
132+
return StructType.of(fields, true);
133+
}
134+
}

0 commit comments

Comments
 (0)