Skip to content

Commit f6ce2be

Browse files
authored
[Coral-schema] Generalize operand schema inference on ordinal return type UDF calls (#548)
* fix nested nullability inference for nested udf calls * spotless * generalize operand schema inference for ordinal return type UDF calls * add unit test + documentation
1 parent 15fc504 commit f6ce2be

File tree

3 files changed

+36
-19
lines changed

3 files changed

+36
-19
lines changed

coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -433,25 +433,20 @@ public RexNode visitLiteral(RexLiteral rexLiteral) {
433433
@Override
434434
public RexNode visitCall(RexCall rexCall) {
435435
/**
436-
* For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively
437-
* and only return type of udf or sql operator is relevant
438-
*/
439-
440-
/**
441-
* If the return type of RexCall is based on the ordinal of its input argument
442-
* and the corresponding input argument refers to a field from the input schema,
443-
* use the field's schema as is.
436+
* If the return type of RexCall is based on an ordinal of its input arguments, then leverage SchemaRexShuttle
437+
* to visit the input argument and use the argument's schema as is to infer the return type of the call
444438
*/
445439
if (rexCall.getOperator().getReturnTypeInference() instanceof OrdinalReturnTypeInferenceV2) {
446440
int index = ((OrdinalReturnTypeInferenceV2) rexCall.getOperator().getReturnTypeInference()).getOrdinal();
447441
RexNode operand = rexCall.operands.get(index);
448-
449-
if (operand instanceof RexInputRef) {
450-
appendRexInputRefField((RexInputRef) operand);
451-
return rexCall;
452-
}
442+
operand.accept(this);
443+
return rexCall;
453444
}
454445

446+
/**
447+
* For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively
448+
* and just directly use the return type of udf or sql operator as the field's schema
449+
*/
455450
RelDataType fieldType = rexCall.getType();
456451
boolean isNullable = SchemaUtilities.isFieldNullable(rexCall, inputSchema);
457452

coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -177,7 +177,7 @@ private static void initializeUdfs() {
177177
executeCreateFunctionQuery("default", Collections.singletonList("foo_udf_return_struct"), "FuncIsEven",
178178
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnStruct");
179179

180-
executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStuct",
180+
executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStruct",
181181
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg");
182182
}
183183

coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -236,9 +236,9 @@ public void testUdfLessThanHundred() {
236236
@Test
237237
public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() {
238238
String viewSql = "CREATE VIEW innerfield_with_udf "
239-
+ "tblproperties('functions' = 'ReturnInnerStuct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
239+
+ "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
240240
+ " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS "
241-
+ "SELECT default_innerfield_with_udf_ReturnInnerStuct('foo', innerRecord) AS innerRecord "
241+
+ "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord) AS innerRecord "
242242
+ "FROM basecomplexmixednullabilities";
243243

244244
TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql);
@@ -252,6 +252,28 @@ public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() {
252252
TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc"));
253253
}
254254

255+
@Test
256+
public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDFForNestedCalls() {
257+
String viewSql = "CREATE VIEW innerfield_with_udf "
258+
+ "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
259+
+ " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS "
260+
+ "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord)) AS innerRecord "
261+
+ "FROM basecomplexmixednullabilities";
262+
263+
TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql);
264+
265+
ViewToAvroSchemaConverter viewToAvroSchemaConverter = ViewToAvroSchemaConverter.create(hiveMetastoreClient);
266+
Schema actualSchema = viewToAvroSchemaConverter.toAvroSchema("default", "innerfield_with_udf");
267+
268+
// Inner ReturnInnerStruct call return type == Return type of it's second argument, innerRecord
269+
// Outer ReturnInnerStruct call return type == Return type of it's second argument, Inner ReturnInnerStruct call return type
270+
// Therefore, Outer ReturnInnerStruct call return type == Return type of innerRecord
271+
//
272+
// We also expect all fields to retain their nullability after applying the UDF calls
273+
Assert.assertEquals(actualSchema.toString(true),
274+
TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc"));
275+
}
276+
255277
@Test
256278
public void testUdfGreaterThanHundred() {
257279
String viewSql = "CREATE VIEW foo_dali_udf2 "

0 commit comments

Comments
 (0)