Skip to content

Commit c51d3b5

Browse files
KevinGe00rzhang10
andauthored
Register "passthrough" UDFs with correct ordinal return type (#541)
* register spark groot udf with correct return type * test showcase * extend + refactor unit test --------- Co-authored-by: Limian Zhang <[email protected]>
1 parent b507761 commit c51d3b5

File tree

6 files changed

+177
-3
lines changed

6 files changed

+177
-3
lines changed

coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -675,10 +675,12 @@ public boolean isOptional(int i) {
675675

676676
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.HasMemberConsentUDF", ReturnTypes.BOOLEAN,
677677
family(SqlTypeFamily.STRING, SqlTypeFamily.ANY, SqlTypeFamily.TIMESTAMP));
678-
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactFieldIfUDF", ARG1,
678+
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactFieldIfUDF",
679+
new OrdinalReturnTypeInferenceV2(1),
679680
family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.STRING, SqlTypeFamily.ANY));
680-
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactSecondarySchemaFieldIfUDF", ARG1, family(
681-
SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY, SqlTypeFamily.STRING, SqlTypeFamily.STRING));
681+
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.RedactSecondarySchemaFieldIfUDF",
682+
new OrdinalReturnTypeInferenceV2(1), family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ARRAY,
683+
SqlTypeFamily.STRING, SqlTypeFamily.STRING));
682684
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.GetMappedValueUDF", FunctionReturnTypes.STRING,
683685
family(SqlTypeFamily.STRING, SqlTypeFamily.STRING));
684686
createAddUserDefinedFunction("com.linkedin.groot.runtime.udf.spark.ExtractCollectionUDF",
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/**
2+
* Copyright 2021-2024 LinkedIn Corporation. All rights reserved.
3+
* Licensed under the BSD-2 Clause license.
4+
* See LICENSE in the project root for license information.
5+
*/
6+
package com.linkedin.coral.hive.hive2rel;
7+
8+
import org.apache.hadoop.hive.ql.exec.Description;
9+
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
10+
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
11+
import org.apache.hadoop.hive.ql.metadata.HiveException;
12+
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
13+
import org.apache.hadoop.hive.serde2.objectinspector.*;
14+
15+
16+
@Description(name = "return_second_arg_struct_udf",
17+
value = "_FUNC_(string, struct) - Returns the second argument (struct) as-is")
18+
public class CoralTestUDFReturnSecondArg extends GenericUDF {
19+
20+
private transient ObjectInspector stringOI;
21+
private transient StructObjectInspector structOI;
22+
23+
@Override
24+
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
25+
// Check the number of arguments
26+
if (arguments.length != 2) {
27+
throw new UDFArgumentLengthException(
28+
"return_struct_udf() requires exactly two arguments: a string and a struct.");
29+
}
30+
31+
// Validate the first argument (string)
32+
if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE || ((PrimitiveObjectInspector) arguments[0])
33+
.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
34+
throw new UDFArgumentException("The first argument must be a string.");
35+
}
36+
37+
// Validate the second argument (struct)
38+
if (arguments[1].getCategory() != ObjectInspector.Category.STRUCT) {
39+
throw new UDFArgumentException("The second argument must be a struct.");
40+
}
41+
42+
// Initialize ObjectInspectors
43+
stringOI = arguments[0];
44+
structOI = (StructObjectInspector) arguments[1];
45+
46+
// Return the ObjectInspector for the struct (second argument)
47+
return structOI;
48+
}
49+
50+
@Override
51+
public Object evaluate(DeferredObject[] arguments) throws HiveException {
52+
// Simply return the second argument as-is
53+
Object structObj = arguments[1].get();
54+
return structObj;
55+
}
56+
57+
@Override
58+
public String getDisplayString(String[] children) {
59+
return "return_struct_udf(" + children[0] + ", " + children[1] + ")";
60+
}
61+
}

coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import com.linkedin.coral.common.HiveMetastoreClient;
3333
import com.linkedin.coral.common.HiveMscAdapter;
3434
import com.linkedin.coral.common.functions.FunctionReturnTypes;
35+
import com.linkedin.coral.hive.hive2rel.functions.OrdinalReturnTypeInferenceV2;
3536
import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry;
3637

3738
import static org.apache.calcite.sql.type.OperandTypes.*;
@@ -81,6 +82,9 @@ public static void registerUdfs() {
8182
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnStruct", FunctionReturnTypes
8283
.rowOf(ImmutableList.of("isEven", "number"), ImmutableList.of(SqlTypeName.BOOLEAN, SqlTypeName.INTEGER)),
8384
family(SqlTypeFamily.INTEGER));
85+
StaticHiveFunctionRegistry.createAddUserDefinedFunction(
86+
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg", new OrdinalReturnTypeInferenceV2(1),
87+
family(SqlTypeFamily.STRING, SqlTypeFamily.ANY));
8488
}
8589

8690
private static void initializeTables() {
@@ -104,6 +108,7 @@ private static void initializeTables() {
104108
String baseComplexNullableWithDefaults = loadSchema("base-complex-nullable-with-defaults.avsc");
105109
String basePrimitive = loadSchema("base-primitive.avsc");
106110
String baseComplexNestedStructSameName = loadSchema("base-complex-nested-struct-same-name.avsc");
111+
String baseComplexMixedNullabilities = loadSchema("base-complex-mixed-nullabilities.avsc");
107112

108113
executeCreateTableQuery("default", "basecomplex", baseComplexSchema);
109114
executeCreateTableQuery("default", "basecomplexunioncompatible", baseComplexUnionCompatible);
@@ -125,6 +130,7 @@ private static void initializeTables() {
125130
executeCreateTableWithPartitionQuery("default", "basenestedcomplex", baseNestedComplexSchema);
126131
executeCreateTableWithPartitionQuery("default", "basecomplexnullablewithdefaults", baseComplexNullableWithDefaults);
127132
executeCreateTableWithPartitionQuery("default", "basecomplexnonnullable", baseComplexNonNullable);
133+
executeCreateTableWithPartitionQuery("default", "basecomplexmixednullabilities", baseComplexMixedNullabilities);
128134

129135
String baseComplexSchemaWithDoc = loadSchema("docTestResources/base-complex-with-doc.avsc");
130136
String baseEnumSchemaWithDoc = loadSchema("docTestResources/base-enum-with-doc.avsc");
@@ -170,6 +176,9 @@ private static void initializeUdfs() {
170176

171177
executeCreateFunctionQuery("default", Collections.singletonList("foo_udf_return_struct"), "FuncIsEven",
172178
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnStruct");
179+
180+
executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStuct",
181+
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg");
173182
}
174183

175184
private static void executeCreateTableQuery(String dbName, String tableName, String schema) {

coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,25 @@ public void testUdfLessThanHundred() {
233233
Assert.assertEquals(actualSchema.toString(true), TestUtils.loadSchema("testUdfLessThanHundred-expected.avsc"));
234234
}
235235

236+
@Test
237+
public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() {
238+
String viewSql = "CREATE VIEW innerfield_with_udf "
239+
+ "tblproperties('functions' = 'ReturnInnerStuct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
240+
+ " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS "
241+
+ "SELECT default_innerfield_with_udf_ReturnInnerStuct('foo', innerRecord) AS innerRecord "
242+
+ "FROM basecomplexmixednullabilities";
243+
244+
TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql);
245+
246+
ViewToAvroSchemaConverter viewToAvroSchemaConverter = ViewToAvroSchemaConverter.create(hiveMetastoreClient);
247+
Schema actualSchema = viewToAvroSchemaConverter.toAvroSchema("default", "innerfield_with_udf");
248+
249+
// Expect all fields to retain their nullability after applying the UDF, CoralTestUDFReturnSecondArg, that simply
250+
// returns the second argument as is
251+
Assert.assertEquals(actualSchema.toString(true),
252+
TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc"));
253+
}
254+
236255
@Test
237256
public void testUdfGreaterThanHundred() {
238257
String viewSql = "CREATE VIEW foo_dali_udf2 "
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"type": "record",
3+
"name": "OuterRecord",
4+
"fields": [
5+
{
6+
"name": "innerRecord",
7+
"type": {
8+
"type": "record",
9+
"name": "InnerRecord",
10+
"fields": [
11+
{
12+
"name": "String_Field_Non_Nullable",
13+
"type": "string"
14+
},
15+
{
16+
"name": "String_Field_Nullable",
17+
"type": [ "string", "null" ]
18+
},
19+
{
20+
"name" : "Int_Field_Non_Nullable",
21+
"type" : "int"
22+
},
23+
{
24+
"name" : "Int_Field_Nullable",
25+
"type" : [ "int", "null" ]
26+
},
27+
{
28+
"name" : "Array_Col_Non_Nullable",
29+
"type" : {
30+
"type" : "array",
31+
"items" : "string"
32+
}
33+
},
34+
{
35+
"name" : "Array_Col_Nullable",
36+
"type" : [ "null", {
37+
"type" : "array",
38+
"items" : [ "null", "string" ]
39+
} ]
40+
}
41+
]
42+
}
43+
}
44+
]
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"type" : "record",
3+
"name" : "innerfield_with_udf",
4+
"namespace" : "default.innerfield_with_udf",
5+
"fields" : [ {
6+
"name" : "innerRecord",
7+
"type" : {
8+
"type" : "record",
9+
"name" : "InnerRecord",
10+
"namespace" : "default.innerfield_with_udf.innerfield_with_udf",
11+
"fields" : [ {
12+
"name" : "String_Field_Non_Nullable",
13+
"type" : "string"
14+
}, {
15+
"name" : "String_Field_Nullable",
16+
"type" : [ "string", "null" ]
17+
}, {
18+
"name" : "Int_Field_Non_Nullable",
19+
"type" : "int"
20+
}, {
21+
"name" : "Int_Field_Nullable",
22+
"type" : [ "int", "null" ]
23+
}, {
24+
"name" : "Array_Col_Non_Nullable",
25+
"type" : {
26+
"type" : "array",
27+
"items" : "string"
28+
}
29+
}, {
30+
"name" : "Array_Col_Nullable",
31+
"type" : [ "null", {
32+
"type" : "array",
33+
"items" : [ "null", "string" ]
34+
} ]
35+
} ]
36+
}
37+
} ]
38+
}

0 commit comments

Comments
 (0)