Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common;

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.serde2.typeinfo.*;

import com.linkedin.coral.common.types.*;


/**
* Converts Hive TypeInfo objects to Coral data types.
* This enables integration between Hive's type system and Coral's type system.
*/
public final class HiveToCoralTypeConverter {

private HiveToCoralTypeConverter() {
// Utility class - prevent instantiation
}

/**
* Converts a Hive TypeInfo to a Coral data type.
* @param typeInfo the Hive type to convert
* @return the corresponding Coral data type
*/
public static CoralDataType convert(TypeInfo typeInfo) {
if (typeInfo == null) {
throw new IllegalArgumentException("TypeInfo cannot be null");
}

switch (typeInfo.getCategory()) {
case PRIMITIVE:
return convertPrimitive((PrimitiveTypeInfo) typeInfo);
case LIST:
return convertList((ListTypeInfo) typeInfo);
case MAP:
return convertMap((MapTypeInfo) typeInfo);
case STRUCT:
return convertStruct((StructTypeInfo) typeInfo);
case UNION:
return convertUnion((UnionTypeInfo) typeInfo);
default:
throw new UnsupportedOperationException("Unsupported type category: " + typeInfo.getCategory());
}
}

private static CoralDataType convertPrimitive(PrimitiveTypeInfo type) {
boolean nullable = true; // Hive types are generally nullable

switch (type.getPrimitiveCategory()) {
case BOOLEAN:
return PrimitiveType.of(CoralTypeKind.BOOLEAN, nullable);
case BYTE:
return PrimitiveType.of(CoralTypeKind.TINYINT, nullable);
case SHORT:
return PrimitiveType.of(CoralTypeKind.SMALLINT, nullable);
case INT:
return PrimitiveType.of(CoralTypeKind.INT, nullable);
case LONG:
return PrimitiveType.of(CoralTypeKind.BIGINT, nullable);
case FLOAT:
return PrimitiveType.of(CoralTypeKind.FLOAT, nullable);
case DOUBLE:
return PrimitiveType.of(CoralTypeKind.DOUBLE, nullable);
case STRING:
return PrimitiveType.of(CoralTypeKind.STRING, nullable);
case DATE:
return PrimitiveType.of(CoralTypeKind.DATE, nullable);
case TIMESTAMP:
// Default to microsecond precision (6)
return TimestampType.of(3, nullable);
case BINARY:
return PrimitiveType.of(CoralTypeKind.BINARY, nullable);
case DECIMAL:
DecimalTypeInfo decimalType = (DecimalTypeInfo) type;
return DecimalType.of(decimalType.precision(), decimalType.scale(), nullable);
case VARCHAR:
VarcharTypeInfo varcharType = (VarcharTypeInfo) type;
return VarcharType.of(varcharType.getLength(), nullable);
case CHAR:
CharTypeInfo charType = (CharTypeInfo) type;
return CharType.of(charType.getLength(), nullable);
case VOID:
case UNKNOWN:
return PrimitiveType.of(CoralTypeKind.STRING, true); // Map to nullable string as a fallback
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor nit to use nullable field here instead of hardcoding 'true', but not a blocker.

default:
throw new UnsupportedOperationException("Unsupported primitive type: " + type.getPrimitiveCategory());
}
}

private static CoralDataType convertList(ListTypeInfo listType) {
CoralDataType elementType = convert(listType.getListElementTypeInfo());
return ArrayType.of(elementType, true); // Lists are nullable in Hive
}

private static CoralDataType convertMap(MapTypeInfo mapType) {
CoralDataType keyType = convert(mapType.getMapKeyTypeInfo());
CoralDataType valueType = convert(mapType.getMapValueTypeInfo());
return MapType.of(keyType, valueType, true); // Maps are nullable in Hive
}

private static CoralDataType convertStruct(StructTypeInfo structType) {
List<String> fieldNames = structType.getAllStructFieldNames();
List<TypeInfo> fieldTypeInfos = structType.getAllStructFieldTypeInfos();

List<StructField> fields = new ArrayList<>();
for (int i = 0; i < fieldTypeInfos.size(); i++) {
CoralDataType fieldType = convert(fieldTypeInfos.get(i));
fields.add(StructField.of(fieldNames.get(i), fieldType));
}

return StructType.of(fields, true); // Structs are nullable in Hive
}

private static CoralDataType convertUnion(UnionTypeInfo unionType) {
// For UNION types, we'll create a struct with all possible fields
// This is similar to how some systems handle union types
List<TypeInfo> memberTypes = unionType.getAllUnionObjectTypeInfos();

// Create fields for each possible type in the union
List<StructField> fields = new ArrayList<>();
for (int i = 0; i < memberTypes.size(); i++) {
CoralDataType fieldType = convert(memberTypes.get(i));
fields.add(StructField.of("field" + i, fieldType));
}

return StructType.of(fields, true);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/**
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common.types;

import java.util.Objects;


/**
* Represents an array data type in the Coral type system.
*/
public final class ArrayType implements CoralDataType {
private final CoralDataType elementType;
private final boolean nullable;

/**
* Creates a new array type.
* @param elementType the type of elements in the array
* @param nullable whether this type allows null values
*/
public static ArrayType of(CoralDataType elementType, boolean nullable) {
return new ArrayType(elementType, nullable);
}

private ArrayType(CoralDataType elementType, boolean nullable) {
this.elementType = Objects.requireNonNull(elementType, "Element type cannot be null");
this.nullable = nullable;
}

/**
* Returns the type of elements in this array.
* @return the element type
*/
public CoralDataType getElementType() {
return elementType;
}

@Override
public CoralTypeKind getKind() {
return CoralTypeKind.ARRAY;
}

@Override
public boolean isNullable() {
return nullable;
}

@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
ArrayType that = (ArrayType) o;
return nullable == that.nullable && Objects.equals(elementType, that.elementType);
}

@Override
public int hashCode() {
return Objects.hash(elementType, nullable);
}

@Override
public String toString() {
return "ARRAY<" + elementType + ">" + (nullable ? " NULL" : " NOT NULL");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/**
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common.types;

import java.util.Objects;


/**
* Represents a fixed-length character data type in the Coral type system.
*/
public final class CharType implements CoralDataType {
private final int length;
private final boolean nullable;

/**
* Creates a new CHAR type.
* @param length the fixed length of the character string
* @param nullable whether this type allows null values
*/
public static CharType of(int length, boolean nullable) {
if (length <= 0) {
throw new IllegalArgumentException("Length must be positive, got: " + length);
}
return new CharType(length, nullable);
}

private CharType(int length, boolean nullable) {
this.length = length;
this.nullable = nullable;
}

/**
* Returns the fixed length of this CHAR type.
* @return the length
*/
public int getLength() {
return length;
}

@Override
public CoralTypeKind getKind() {
return CoralTypeKind.CHAR;
}

@Override
public boolean isNullable() {
return nullable;
}

@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
CharType that = (CharType) o;
return length == that.length && nullable == that.nullable;
}

@Override
public int hashCode() {
return Objects.hash(length, nullable);
}

@Override
public String toString() {
return "CHAR(" + length + ")" + (nullable ? " NULL" : " NOT NULL");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common.types;

/**
* Represents a data type in the Coral type system.
* This interface provides a planner-agnostic abstraction for data types
* that can be converted to various execution engine specific types.
*/
public interface CoralDataType {
/**
* Returns the kind of this data type.
* @return the type kind
*/
CoralTypeKind getKind();

/**
* Returns whether this data type allows null values.
* @return true if nullable, false otherwise
*/
boolean isNullable();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright 2024-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common.types;

/**
* Enumeration of all supported data type kinds in the Coral type system.
* This provides a comprehensive set of primitive and complex types that
* can be mapped to various execution engines.
*/
public enum CoralTypeKind {
// Primitive numeric types
BOOLEAN,
Comment on lines +14 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for backward compatibility (& a default mapping for unkown types), maybe we should also create NULL / OTHER as it exists today in TypeConverter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know if it is used in practice. Also coral conversion heavily relies on deterministic type inference. We can always add it in the future if the use case arises?

TINYINT,
SMALLINT,
INT,
BIGINT,
FLOAT,
DOUBLE,
DECIMAL,

// String and character types
CHAR,
VARCHAR,
STRING,

// Date and time types
DATE,
TIME,
TIMESTAMP,

// Binary types
BINARY,

// Complex types
ARRAY,
MAP,
STRUCT
}
Loading