Skip to content

Commit 633474f

Browse files
aastha25wmoustafa
andauthored
[Coral-Hive] [Coral-Trino] Make named_struct a Coral IR operator and Migrate GenericProject Function (#431)
* Initial commit for genericProject Migration * remaning genericProject and some from namedStruct * initial commit for timestamp op migrations * rename SqlShuttle class * enable test and rename var * initial commit for namedstruct from PR#412 * rename transformer and add UT * build fix * fix for nested named_struct() * add documentation --------- Co-authored-by: Walaa Eldin Moustafa <[email protected]>
1 parent e808370 commit 633474f

File tree

11 files changed

+191
-133
lines changed

11 files changed

+191
-133
lines changed

coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveConvertletTable.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
/**
2-
* Copyright 2018-2022 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
66
package com.linkedin.coral.hive.hive2rel;
77

8-
import java.util.ArrayList;
98
import java.util.List;
109

1110
import com.google.common.base.Preconditions;
@@ -17,7 +16,6 @@
1716
import org.apache.calcite.sql.SqlNode;
1817
import org.apache.calcite.sql.SqlNodeList;
1918
import org.apache.calcite.sql.fun.SqlCastFunction;
20-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2119
import org.apache.calcite.sql2rel.ReflectiveConvertletTable;
2220
import org.apache.calcite.sql2rel.SqlRexContext;
2321
import org.apache.calcite.sql2rel.SqlRexConvertlet;
@@ -26,7 +24,6 @@
2624
import com.linkedin.coral.com.google.common.collect.ImmutableList;
2725
import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator;
2826
import com.linkedin.coral.hive.hive2rel.functions.HiveInOperator;
29-
import com.linkedin.coral.hive.hive2rel.functions.HiveNamedStructFunction;
3027

3128

3229
/**
@@ -35,17 +32,6 @@
3532
*/
3633
public class HiveConvertletTable extends ReflectiveConvertletTable {
3734

38-
@SuppressWarnings("unused")
39-
public RexNode convertNamedStruct(SqlRexContext cx, HiveNamedStructFunction func, SqlCall call) {
40-
List<RexNode> operandExpressions = new ArrayList<>(call.operandCount() / 2);
41-
for (int i = 0; i < call.operandCount(); i += 2) {
42-
operandExpressions.add(cx.convertExpression(call.operand(i + 1)));
43-
}
44-
RelDataType retType = cx.getValidator().getValidatedNodeType(call);
45-
RexNode rowNode = cx.getRexBuilder().makeCall(retType, SqlStdOperatorTable.ROW, operandExpressions);
46-
return cx.getRexBuilder().makeCast(retType, rowNode);
47-
}
48-
4935
@SuppressWarnings("unused")
5036
public RexNode convertHiveInOperator(SqlRexContext cx, HiveInOperator operator, SqlCall call) {
5137
List<SqlNode> operandList = call.getOperandList();

coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2017-2022 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -497,12 +497,11 @@ public void testStructPeekDisallowed() {
497497
public void testStructReturnFieldAccess() {
498498
final String sql = "select named_struct('field_a', 10, 'field_b', 'abc').field_b";
499499
RelNode rel = toRel(sql);
500-
final String expectedRel = "LogicalProject(EXPR$0=[CAST(ROW(10, 'abc')):"
501-
+ "RecordType(INTEGER NOT NULL field_a, CHAR(3) NOT NULL field_b) NOT NULL.field_b])\n"
500+
final String expectedRel = "LogicalProject(EXPR$0=[named_struct('field_a', 10, 'field_b', 'abc').field_b])\n"
502501
+ " LogicalValues(tuples=[[{ 0 }]])\n";
503502
assertEquals(relToStr(rel), expectedRel);
504-
final String expectedSql = "SELECT CAST(ROW(10, 'abc') AS ROW(field_a INTEGER, field_b CHAR(3))).field_b\n"
505-
+ "FROM (VALUES (0)) t (ZERO)";
503+
final String expectedSql =
504+
"SELECT named_struct('field_a', 10, 'field_b', 'abc').field_b\n" + "FROM (VALUES (0)) t (ZERO)";
506505
assertEquals(relToHql(rel), expectedSql);
507506
}
508507

coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/NamedStructTest.java

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2018-2022 LinkedIn Corporation. All rights reserved.
2+
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
33
* Licensed under the BSD-2 Clause license.
44
* See LICENSE in the project root for license information.
55
*/
@@ -44,8 +44,7 @@ public void testMixedTypes() {
4444
final String sql = "SELECT named_struct('abc', 123, 'def', 'xyz')";
4545
RelNode rel = toRel(sql);
4646
final String generated = relToStr(rel);
47-
final String expected = ""
48-
+ "LogicalProject(EXPR$0=[CAST(ROW(123, 'xyz')):RecordType(INTEGER NOT NULL abc, CHAR(3) NOT NULL def) NOT NULL])\n"
47+
final String expected = "" + "LogicalProject(EXPR$0=[named_struct('abc', 123, 'def', 'xyz')])\n"
4948
+ " LogicalValues(tuples=[[{ 0 }]])\n";
5049
assertEquals(generated, expected);
5150
}
@@ -54,9 +53,8 @@ public void testMixedTypes() {
5453
public void testNullFieldValue() {
5554
final String sql = "SELECT named_struct('abc', cast(NULL as int), 'def', 150)";
5655
final String generated = sqlToRelStr(sql);
57-
final String expected =
58-
"LogicalProject(EXPR$0=[CAST(ROW(CAST(null:NULL):INTEGER, 150)):RecordType(INTEGER abc, INTEGER NOT NULL def) NOT NULL])\n"
59-
+ " LogicalValues(tuples=[[{ 0 }]])\n";
56+
final String expected = "LogicalProject(EXPR$0=[named_struct('abc', CAST(null:NULL):INTEGER, 'def', 150)])\n"
57+
+ " LogicalValues(tuples=[[{ 0 }]])\n";
6058
assertEquals(generated, expected);
6159
}
6260

@@ -65,7 +63,7 @@ public void testAllNullValues() {
6563
final String sql = "SELECT named_struct('abc', cast(NULL as int), 'def', cast(NULL as double))";
6664
final String generated = sqlToRelStr(sql);
6765
final String expected =
68-
"LogicalProject(EXPR$0=[CAST(ROW(CAST(null:NULL):INTEGER, CAST(null:NULL):DOUBLE)):RecordType(INTEGER abc, DOUBLE def) NOT NULL])\n"
66+
"LogicalProject(EXPR$0=[named_struct('abc', CAST(null:NULL):INTEGER, 'def', CAST(null:NULL):DOUBLE)])\n"
6967
+ " LogicalValues(tuples=[[{ 0 }]])\n";
7068
assertEquals(generated, expected);
7169
}
@@ -74,10 +72,9 @@ public void testAllNullValues() {
7472
public void testNestedComplexTypes() {
7573
final String sql = "SELECT named_struct('arr', array(10, 15), 's', named_struct('f1', 123, 'f2', array(20.5)))";
7674
final String generated = sqlToRelStr(sql);
77-
final String expected = "LogicalProject(EXPR$0=[CAST(ROW(ARRAY(10, 15), CAST(ROW(123, ARRAY(20.5:DECIMAL(3, 1)))):"
78-
+ "RecordType(INTEGER NOT NULL f1, DECIMAL(3, 1) NOT NULL ARRAY NOT NULL f2) NOT NULL)):"
79-
+ "RecordType(INTEGER NOT NULL ARRAY NOT NULL arr, RecordType(INTEGER NOT NULL f1, DECIMAL(3, 1) NOT NULL ARRAY NOT NULL f2) NOT NULL s) NOT NULL])\n"
80-
+ " LogicalValues(tuples=[[{ 0 }]])\n";
75+
final String expected =
76+
"LogicalProject(EXPR$0=[named_struct('arr', ARRAY(10, 15), 's', named_struct('f1', 123, 'f2', ARRAY(20.5:DECIMAL(3, 1))))])\n"
77+
+ " LogicalValues(tuples=[[{ 0 }]])\n";
8178
// verified by human that expected string is correct and retained here to protect from future changes
8279
assertEquals(generated, expected);
8380
}

coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
4141

4242
import com.linkedin.coral.com.google.common.collect.ImmutableList;
43-
import com.linkedin.coral.common.functions.GenericProjectFunction;
44-
import com.linkedin.coral.trino.rel2trino.functions.GenericProjectToTrinoConverter;
4543

4644
import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
4745
import static org.apache.calcite.sql.type.ReturnTypes.explicit;
@@ -160,14 +158,6 @@ public TrinoRexConverter(RelNode node, Map<String, Boolean> configs) {
160158

161159
@Override
162160
public RexNode visitCall(RexCall call) {
163-
// GenericProject requires a nontrivial function rewrite because of the following:
164-
// - makes use of Trino built-in UDFs transform_values for map objects and transform for array objects
165-
// which has lambda functions as parameters
166-
// - syntax is difficult for Calcite to parse
167-
// - the return type varies based on a desired schema to be projected
168-
if (call.getOperator() instanceof GenericProjectFunction) {
169-
return GenericProjectToTrinoConverter.convertGenericProject(rexBuilder, call, node);
170-
}
171161

172162
final String operatorName = call.getOperator().getName();
173163

coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/DataTypeDerivedSqlCallConverter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import com.linkedin.coral.common.utils.TypeDerivationUtil;
1616
import com.linkedin.coral.hive.hive2rel.HiveToRelConverter;
1717
import com.linkedin.coral.trino.rel2trino.transformers.FromUtcTimestampOperatorTransformer;
18+
import com.linkedin.coral.trino.rel2trino.transformers.GenericProjectTransformer;
19+
import com.linkedin.coral.trino.rel2trino.transformers.NamedStructToCastTransformer;
1820

1921

2022
/**
@@ -31,7 +33,8 @@ public class DataTypeDerivedSqlCallConverter extends SqlShuttle {
3133
public DataTypeDerivedSqlCallConverter(HiveMetastoreClient mscClient, SqlNode topSqlNode) {
3234
SqlValidator sqlValidator = new HiveToRelConverter(mscClient).getSqlValidator();
3335
TypeDerivationUtil typeDerivationUtil = new TypeDerivationUtil(sqlValidator, topSqlNode);
34-
operatorTransformerList = SqlCallTransformers.of(new FromUtcTimestampOperatorTransformer(typeDerivationUtil));
36+
operatorTransformerList = SqlCallTransformers.of(new FromUtcTimestampOperatorTransformer(typeDerivationUtil),
37+
new GenericProjectTransformer(typeDerivationUtil), new NamedStructToCastTransformer(typeDerivationUtil));
3538
}
3639

3740
@Override

coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/functions/RelDataTypeToTrinoTypeStringConverter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
* If a column, colA, has a RelDataType, relDataTypeA, with a Trino type string, trinoTypeStringA = buildStructDataTypeString(relDataTypeA),
2222
* then the following operation is syntactically and semantically correct in Trino: CAST(colA as trinoTypeStringA)
2323
*/
24-
class RelDataTypeToTrinoTypeStringConverter {
24+
public class RelDataTypeToTrinoTypeStringConverter {
2525
private RelDataTypeToTrinoTypeStringConverter() {
2626
}
2727

coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/functions/TrinoMapTransformValuesFunction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* Instead, we represent the input to this UDF as a string and we set its return type is passed as a parameter
2020
* on creation.
2121
*/
22-
class TrinoMapTransformValuesFunction extends GenericTemplateFunction {
22+
public class TrinoMapTransformValuesFunction extends GenericTemplateFunction {
2323
public TrinoMapTransformValuesFunction(RelDataType transformValuesDataType) {
2424
super(transformValuesDataType, "transform_values");
2525
}

coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/functions/TrinoStructCastRowFunction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* Instead, we represent the input to this UDF as a string and we set its return type is passed as a parameter
2020
* on creation.
2121
*/
22-
class TrinoStructCastRowFunction extends GenericTemplateFunction {
22+
public class TrinoStructCastRowFunction extends GenericTemplateFunction {
2323
public TrinoStructCastRowFunction(RelDataType structDataType) {
2424
super(structDataType, "cast");
2525
}

0 commit comments

Comments
 (0)