Skip to content

Commit ec23113

Browse files
authored
Introduce Coral type system abstraction (#558)
1 parent 9f8dfce commit ec23113

File tree

17 files changed

+1560
-10
lines changed

17 files changed

+1560
-10
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
import org.apache.hadoop.hive.serde2.typeinfo.*;
12+
13+
import com.linkedin.coral.common.types.*;
14+
15+
16+
/**
17+
* Converts Hive TypeInfo objects to Coral data types.
18+
* This enables integration between Hive's type system and Coral's type system.
19+
*/
20+
public final class HiveToCoralTypeConverter {
21+
22+
private HiveToCoralTypeConverter() {
23+
// Utility class - prevent instantiation
24+
}
25+
26+
/**
27+
* Converts a Hive TypeInfo to a Coral data type.
28+
* @param typeInfo the Hive type to convert
29+
* @return the corresponding Coral data type
30+
*/
31+
public static CoralDataType convert(TypeInfo typeInfo) {
32+
if (typeInfo == null) {
33+
throw new IllegalArgumentException("TypeInfo cannot be null");
34+
}
35+
36+
switch (typeInfo.getCategory()) {
37+
case PRIMITIVE:
38+
return convertPrimitive((PrimitiveTypeInfo) typeInfo);
39+
case LIST:
40+
return convertList((ListTypeInfo) typeInfo);
41+
case MAP:
42+
return convertMap((MapTypeInfo) typeInfo);
43+
case STRUCT:
44+
return convertStruct((StructTypeInfo) typeInfo);
45+
case UNION:
46+
return convertUnion((UnionTypeInfo) typeInfo);
47+
default:
48+
throw new UnsupportedOperationException("Unsupported type category: " + typeInfo.getCategory());
49+
}
50+
}
51+
52+
private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
53+
boolean nullable = true; // Hive types are generally nullable
54+
55+
switch (type.getPrimitiveCategory()) {
56+
case BOOLEAN:
57+
return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable);
58+
case BYTE:
59+
return PrimitiveType.of(CoralTypeKind.TINYINT, nullable);
60+
case SHORT:
61+
return PrimitiveType.of(CoralTypeKind.SMALLINT, nullable);
62+
case INT:
63+
return PrimitiveType.of(CoralTypeKind.INT, nullable);
64+
case LONG:
65+
return PrimitiveType.of(CoralTypeKind.BIGINT, nullable);
66+
case FLOAT:
67+
return PrimitiveType.of(CoralTypeKind.FLOAT, nullable);
68+
case DOUBLE:
69+
return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable);
70+
case STRING:
71+
return PrimitiveType.of(CoralTypeKind.STRING, nullable);
72+
case DATE:
73+
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
74+
case TIMESTAMP:
75+
// Default to microsecond precision (6)
76+
return TimestampType.of(3, nullable);
77+
case BINARY:
78+
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
79+
case DECIMAL:
80+
DecimalTypeInfo decimalType = (DecimalTypeInfo) type;
81+
return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable);
82+
case VARCHAR:
83+
VarcharTypeInfo varcharType = (VarcharTypeInfo) type;
84+
return VarcharType.of(varcharType.getLength(), nullable);
85+
case CHAR:
86+
CharTypeInfo charType = (CharTypeInfo) type;
87+
return CharType.of(charType.getLength(), nullable);
88+
case VOID:
89+
case UNKNOWN:
90+
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
91+
default:
92+
throw new UnsupportedOperationException("Unsupported primitive type: " + type.getPrimitiveCategory());
93+
}
94+
}
95+
96+
private static CoralDataType convertList(ListTypeInfo listType) {
97+
CoralDataType elementType = convert(listType.getListElementTypeInfo());
98+
return ArrayType.of(elementType, true); // Lists are nullable in Hive
99+
}
100+
101+
private static CoralDataType convertMap(MapTypeInfo mapType) {
102+
CoralDataType keyType = convert(mapType.getMapKeyTypeInfo());
103+
CoralDataType valueType = convert(mapType.getMapValueTypeInfo());
104+
return MapType.of(keyType, valueType, true); // Maps are nullable in Hive
105+
}
106+
107+
private static CoralDataType convertStruct(StructTypeInfo structType) {
108+
List<String> fieldNames = structType.getAllStructFieldNames();
109+
List<TypeInfo> fieldTypeInfos = structType.getAllStructFieldTypeInfos();
110+
111+
List<StructField> fields = new ArrayList<>();
112+
for (int i = 0; i < fieldTypeInfos.size(); i++) {
113+
CoralDataType fieldType = convert(fieldTypeInfos.get(i));
114+
fields.add(StructField.of(fieldNames.get(i), fieldType));
115+
}
116+
117+
return StructType.of(fields, true); // Structs are nullable in Hive
118+
}
119+
120+
private static CoralDataType convertUnion(UnionTypeInfo unionType) {
121+
// For UNION types, we'll create a struct with all possible fields
122+
// This is similar to how some systems handle union types
123+
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();
124+
125+
// Create fields for each possible type in the union
126+
List<StructField> fields = new ArrayList<>();
127+
for (int i = 0; i < memberTypes.size(); i++) {
128+
CoralDataType fieldType = convert(memberTypes.get(i));
129+
fields.add(StructField.of("field" + i, fieldType));
130+
}
131+
132+
return StructType.of(fields, true);
133+
}
134+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common.types;
7+
8+
import java.util.Objects;
9+
10+
11+
/**
12+
* Represents an array data type in the Coral type system.
13+
*/
14+
public final class ArrayType implements CoralDataType {
15+
private final CoralDataType elementType;
16+
private final boolean nullable;
17+
18+
/**
19+
* Creates a new array type.
20+
* @param elementType the type of elements in the array
21+
* @param nullable whether this type allows null values
22+
*/
23+
public static ArrayType of(CoralDataType elementType, boolean nullable) {
24+
return new ArrayType(elementType, nullable);
25+
}
26+
27+
private ArrayType(CoralDataType elementType, boolean nullable) {
28+
this.elementType = Objects.requireNonNull(elementType, "Element type cannot be null");
29+
this.nullable = nullable;
30+
}
31+
32+
/**
33+
* Returns the type of elements in this array.
34+
* @return the element type
35+
*/
36+
public CoralDataType getElementType() {
37+
return elementType;
38+
}
39+
40+
@Override
41+
public CoralTypeKind getKind() {
42+
return CoralTypeKind.ARRAY;
43+
}
44+
45+
@Override
46+
public boolean isNullable() {
47+
return nullable;
48+
}
49+
50+
@Override
51+
public boolean equals(Object o) {
52+
if (this == o)
53+
return true;
54+
if (o == null || getClass() != o.getClass())
55+
return false;
56+
ArrayType that = (ArrayType) o;
57+
return nullable == that.nullable && Objects.equals(elementType, that.elementType);
58+
}
59+
60+
@Override
61+
public int hashCode() {
62+
return Objects.hash(elementType, nullable);
63+
}
64+
65+
@Override
66+
public String toString() {
67+
return "ARRAY<" + elementType + ">" + (nullable ? " NULL" : " NOT NULL");
68+
}
69+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common.types;
7+
8+
import java.util.Objects;
9+
10+
11+
/**
12+
* Represents a fixed-length character data type in the Coral type system.
13+
*/
14+
public final class CharType implements CoralDataType {
15+
private final int length;
16+
private final boolean nullable;
17+
18+
/**
19+
* Creates a new CHAR type.
20+
* @param length the fixed length of the character string
21+
* @param nullable whether this type allows null values
22+
*/
23+
public static CharType of(int length, boolean nullable) {
24+
if (length <= 0) {
25+
throw new IllegalArgumentException("Length must be positive, got: " + length);
26+
}
27+
return new CharType(length, nullable);
28+
}
29+
30+
private CharType(int length, boolean nullable) {
31+
this.length = length;
32+
this.nullable = nullable;
33+
}
34+
35+
/**
36+
* Returns the fixed length of this CHAR type.
37+
* @return the length
38+
*/
39+
public int getLength() {
40+
return length;
41+
}
42+
43+
@Override
44+
public CoralTypeKind getKind() {
45+
return CoralTypeKind.CHAR;
46+
}
47+
48+
@Override
49+
public boolean isNullable() {
50+
return nullable;
51+
}
52+
53+
@Override
54+
public boolean equals(Object o) {
55+
if (this == o)
56+
return true;
57+
if (o == null || getClass() != o.getClass())
58+
return false;
59+
CharType that = (CharType) o;
60+
return length == that.length && nullable == that.nullable;
61+
}
62+
63+
@Override
64+
public int hashCode() {
65+
return Objects.hash(length, nullable);
66+
}
67+
68+
@Override
69+
public String toString() {
70+
return "CHAR(" + length + ")" + (nullable ? " NULL" : " NOT NULL");
71+
}
72+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common.types;
7+
8+
/**
9+
* Represents a data type in the Coral type system.
10+
* This interface provides a planner-agnostic abstraction for data types
11+
* that can be converted to various execution engine specific types.
12+
*/
13+
public interface CoralDataType {
14+
/**
15+
* Returns the kind of this data type.
16+
* @return the type kind
17+
*/
18+
CoralTypeKind getKind();
19+
20+
/**
21+
* Returns whether this data type allows null values.
22+
* @return true if nullable, false otherwise
23+
*/
24+
boolean isNullable();
25+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.common.types;
7+
8+
/**
9+
* Enumeration of all supported data type kinds in the Coral type system.
10+
* This provides a comprehensive set of primitive and complex types that
11+
* can be mapped to various execution engines.
12+
*/
13+
public enum CoralTypeKind {
14+
// Primitive numeric types
15+
BOOLEAN,
16+
TINYINT,
17+
SMALLINT,
18+
INT,
19+
BIGINT,
20+
FLOAT,
21+
DOUBLE,
22+
DECIMAL,
23+
24+
// String and character types
25+
CHAR,
26+
VARCHAR,
27+
STRING,
28+
29+
// Date and time types
30+
DATE,
31+
TIME,
32+
TIMESTAMP,
33+
34+
// Binary types
35+
BINARY,
36+
37+
// Complex types
38+
ARRAY,
39+
MAP,
40+
STRUCT
41+
}

0 commit comments

Comments
 (0)