diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java index 2de157b0d..4ce175dc9 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java @@ -23,14 +23,17 @@ import com.linkedin.coral.hive.hive2rel.functions.HiveRLikeOperator; import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry; import com.linkedin.coral.trino.rel2trino.functions.TrinoElementAtFunction; +import com.linkedin.coral.trino.rel2trino.transformers.AsOperatorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.CollectListOrSetFunctionTransformer; import com.linkedin.coral.trino.rel2trino.transformers.CoralRegistryOperatorRenameSqlCallTransformer; import com.linkedin.coral.trino.rel2trino.transformers.CurrentTimestampTransformer; import com.linkedin.coral.trino.rel2trino.transformers.GenericCoralRegistryOperatorRenameSqlCallTransformer; +import com.linkedin.coral.trino.rel2trino.transformers.JoinSqlCallTransformer; import com.linkedin.coral.trino.rel2trino.transformers.MapValueConstructorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.ReturnTypeAdjustmentTransformer; import com.linkedin.coral.trino.rel2trino.transformers.SqlSelectAliasAppenderTransformer; import com.linkedin.coral.trino.rel2trino.transformers.ToDateOperatorTransformer; +import com.linkedin.coral.trino.rel2trino.transformers.UnnestOperatorTransformer; import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*; @@ -120,7 +123,8 @@ protected SqlCall transform(SqlCall sqlCall) { "com.linkedin.stdudfs.hive.daliudfs.UrnExtractorFunctionWrapper", 1, "urn_extractor"), new GenericCoralRegistryOperatorRenameSqlCallTransformer(), - new ReturnTypeAdjustmentTransformer(configs)); + new ReturnTypeAdjustmentTransformer(configs), new UnnestOperatorTransformer(), new AsOperatorTransformer(), + new JoinSqlCallTransformer()); } private SqlOperator hiveToCoralSqlOperator(String functionName) { diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java index e3b9351d6..80e4fd705 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java @@ -12,15 +12,13 @@ import java.util.List; import java.util.Map; -import com.google.common.collect.ImmutableMap; - +import org.apache.calcite.rel.BiRel; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.logical.LogicalTableFunctionScan; import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rel.type.RelDataTypeFieldImpl; -import org.apache.calcite.rel.type.RelRecordType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexLiteral; @@ -33,9 +31,8 @@ import com.linkedin.coral.com.google.common.collect.ImmutableList; import com.linkedin.coral.common.HiveMetastoreClient; +import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; -import com.linkedin.coral.hive.hive2rel.rel.HiveUncollect; -import com.linkedin.coral.trino.rel2trino.functions.TrinoArrayTransformFunction; import static com.google.common.base.Preconditions.*; import static com.linkedin.coral.trino.rel2trino.Calcite2TrinoUDFConverter.convertRel; @@ -151,72 +148,77 @@ public Result visit(Project e) { return builder.result(); } + /** + * Uncollect RelNode represents a table function that expands an array/map column into a relation. + * Super's implementation uses the conversion result of the child node as is and appends the function operator and an AS operator with two aliases. + * This generates a SqlNode like: + *
+   *     UNNEST (SELECT `complex`.`c` AS `col` FROM (VALUES  (0)) AS `t` (`ZERO`)) AS `t_alias` (`col_alias`)
+   * 
+ * + * However, the above result adds complexity to the overall transformations for the following reasons: + * 1. When expanding an array of type struct, the super's implementation generates individual columns for each data type inside the struct. + * 2. Super's SqlNode has an expendable SELECT clause inside the UNNEST operator due to the preexisting extraneous child LogicalProject RelNode. + * 3. Super's SqlNode does not mimic the original SqlNode constructed from an input SQL. + * + * Coral overrides Uncollect type RelNode with HiveUncollect to support returning a row set of a single column for operand type array[struct] + * as opposed to super's behavior which returns individual columns for each data type inside the struct. + * For the map operand, HiveUncollect's row type is same as the Uncollect's - a row set with two columns corresponding to (key, value). + * This overriding implementation also extracts the column(s) to expand from the conversion result of the child node instead of appending the nested SELECT clause as is + * and introduces the function operator as its parent. + * This override outputs a more easily parsable SqlNode that is consistent with the original SqlNode. + * + * @param e RelNode of type HiveUncollect as input. Example: + *
+   *             HiveUncollect
+   *                      \
+   *               LogicalProject(col=[$cor0.c])
+   *                        \
+   *                 LogicalValues(tuples=[[{ 0 }]])
+   *          
+ * + * @return Result of converting the RelNode to a SqlNode. + * The SqlNode generated by converting the above RelNode example is: + *
+   *                               (SqlBasicCall)
+   *                           UNNEST(`complex`.`c`)
+   *                                     |
+   *            _________________________|__________________________
+   *           |                                                   |
+   *  Operator: CoralSqlUnnestOperator                 Operand: `complex`.`c`
+   *
+   *        
+ */ + @Override public Result visit(Uncollect e) { - if (!isTrinoSupportedUnnest(e)) { - throw new UnsupportedOperationException("Trino does not allow unnest a result of a queries"); - } - // Remove SELECT in UNNEST(SELECT FROM (VALUES(0))) - // and generate UNNEST() AS () instead. - final Result x = visitChild(0, e.getInput()); - // Build + // projectResult's SqlNode representation: SELECT `complex`.`c` AS `col` FROM (VALUES (0)) AS `t` (`ZERO`) + final Result projectResult = visitChild(0, e.getInput()); + + // Extract column(s) to unnest from projectResult + // to generate simpler operand for UNNEST operator final List unnestOperands = new ArrayList<>(); + + RelDataType recordType = null; + boolean withOrdinality = e.withOrdinality; + for (RexNode unnestCol : ((Project) e.getInput()).getChildExps()) { - if (!configs.getOrDefault(SUPPORT_LEGACY_UNNEST_ARRAY_OF_STRUCT, false) && e instanceof HiveUncollect - && unnestCol.getType().getSqlTypeName().equals(SqlTypeName.ARRAY) + unnestOperands.add(projectResult.qualifiedContext().toSql(null, unnestCol)); + if (unnestCol.getType().getSqlTypeName().equals(SqlTypeName.ARRAY) && unnestCol.getType().getComponentType().getSqlTypeName().equals(SqlTypeName.ROW)) { - - // wrapper Record type with single column. - // It is needed as Trino follows SQL standard when unnesting - // ARRAY of ROWs, exposing each field in a ROW as separate column. This is not in-line with what - // Hive's LATERAL VIEW EXPLODE does, exposing whole ROW (struct) as a single column. - // Adding extra artificial wrapping ROW with single field simulates Hive semantics in Trino. - // - // Example transformation: - // - // Given table with an array of structs column: - // CREATE TABLE example_table(id INTEGER, arr array>) - // We rewrite view defined as: - // SELECT id, arr_exp FROM example_table LATERAL VIEW EXPLODE(arr) t AS arr_exp - // To: - // SELECT "$cor0".id AS id, t1.arr_exp AS arr_exp - // FROM example_table AS "$cor0" - // CROSS JOIN LATERAL (SELECT arr_exp - // FROM UNNEST(TRANSFORM("$cor0".arr, x -> ROW(x))) AS t0 (arr_exp)) AS t1 - // - // The crucial part in above transformation is call to TRANSFORM with lambda which adds extra layer of - // ROW wrapping. - - RelRecordType transformDataType = new RelRecordType( - ImmutableList.of(new RelDataTypeFieldImpl("wrapper_field", 0, unnestCol.getType().getComponentType()))); - - // wrap unnested field to type defined above using transform(field, x -> ROW(x)) - TrinoArrayTransformFunction tranformFunction = new TrinoArrayTransformFunction(transformDataType); - SqlNode fieldRef = x.qualifiedContext().toSql(null, unnestCol); - String fieldRefString = fieldRef.toSqlString(TrinoSqlDialect.INSTANCE).getSql(); - SqlCharStringLiteral transformArgsLiteral = - SqlLiteral.createCharString(String.format("%s, x -> ROW(x)", fieldRefString), POS); - - unnestOperands.add(tranformFunction.createCall(POS, transformArgsLiteral)); - } else { - unnestOperands.add(x.qualifiedContext().toSql(null, unnestCol)); + recordType = unnestCol.getType().getComponentType(); } } - // Build UNNEST() or UNNEST() WITH ORDINALITY - final SqlNode unnestNode = - (e.withOrdinality ? SqlStdOperatorTable.UNNEST_WITH_ORDINALITY : SqlStdOperatorTable.UNNEST).createCall(POS, - unnestOperands); + // Generate SqlCall with Coral's UNNEST Operator and the unnestOperands. Also, persist ordinality and operand's data type + final SqlNode unnestCall = + new CoralSqlUnnestOperator(withOrdinality, recordType).createCall(POS, unnestOperands.toArray(new SqlNode[0])); - // Build UNNEST() (WITH ORDINALITY) AS () - final List asOperands = createAsFullOperands(e.getRowType(), unnestNode, x.neededAlias); - final SqlNode asNode = SqlStdOperatorTable.AS.createCall(POS, asOperands); - - // Reuse the same x.neededAlias since that's already unique by directly calling "new Result(...)" + // Reuse the same projectResult.neededAlias since that's already unique by directly calling "new Result(...)" // instead of calling super.result(...), which will generate a new table alias and cause an extra // "AS" to be added to the generated SQL statement and make it invalid. - return new Result(asNode, ImmutableList.of(Clause.FROM), null, e.getRowType(), - ImmutableMap.of(x.neededAlias, e.getRowType())); + return new Result(unnestCall, ImmutableList.of(Clause.FROM), null, e.getRowType(), + com.linkedin.coral.com.google.common.collect.ImmutableMap.of(projectResult.neededAlias, e.getRowType())); } /** @@ -264,25 +266,132 @@ private boolean isTrinoSupportedUnnest(Uncollect uncollect) { return false; } + /** + * Join represents a RelNode with two child relational expressions linked by a join type. + * Super's implementation uses the conversion result of the right child as is. + * When the right child of a Join node is an Uncollect / TableFunction type RelNode, + * this overriding implementation introduces LATERAL and AS operators as parents of the conversion result of the right child. + * + * @param e RelNode of type Join with two child nodes as input. Example: + *
+   *                             LogicalJoin(condition=[true], joinType=[inner])
+   *                                /                                      \
+   *    LogicalTableScan(table=[[hive, default, complex]])              HiveUncollect
+   *                                                                         \
+   *           			          			                        		  	LogicalProject(col=[$cor0.c])
+   *                                                                           \
+   *           			          			         			          			 	LogicalValues(tuples=[[{ 0 }]])
+   *        
+ * + * @return Result of converting the RelNode to a SqlNode. + * The SqlNode generated by converting the above RelNode example is: + *
+   *                                      (SqlCall)
+   *          SqlJoin[`default`.`complex` , LATERAL UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)]
+   *                                         |
+   *                _________________________|_____________________________
+   *               |                         |                            |
+   *  left: `default`.`complex`         joinType: ,       right: LATERAL UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)
+   *
+   *        
+ */ + @Override + public Result visit(Join e) { + Result leftResult = this.visitChild(0, e.getLeft()).resetAlias(); + Result rightResult = this.visitChild(1, e.getRight()).resetAlias(); + Context leftContext = leftResult.qualifiedContext(); + Context rightContext = rightResult.qualifiedContext(); + SqlNode sqlCondition = null; + SqlLiteral condType = JoinConditionType.ON.symbol(POS); + JoinType joinType = joinType(e.getJoinType()); + + if (e.getJoinType() == JoinRelType.INNER && e.getCondition().isAlwaysTrue()) { + joinType = dialect.emulateJoinTypeForCrossJoin(); + condType = JoinConditionType.NONE.symbol(POS); + } else { + sqlCondition = convertConditionToSqlNode(e.getCondition(), leftContext, rightContext, + e.getLeft().getRowType().getFieldCount()); + } + + SqlNode rightSqlNode = rightResult.asFrom(); + + if (e.getRight() instanceof LogicalTableFunctionScan || e.getRight() instanceof Uncollect) { + rightSqlNode = generateRightChildForSqlJoinWithLateralViews(e, rightResult); + } + + SqlNode join = new SqlJoin(POS, leftResult.asFrom(), SqlLiteral.createBoolean(false, POS), joinType.symbol(POS), + rightSqlNode, condType, sqlCondition); + + return result(join, leftResult, rightResult); + } + + /** + * Correlate represents a RelNode with two child relational expressions linked by a join type. + * Super's implementation introduces a LATERAL operator and an AS operator with a single alias as parents of the conversion result of the right child. + * This overriding implementation performs the same operations only when the right child of the Correlate node is an Uncollect / TableFunction type RelNode. + * Moreover, AS operator inserts two aliases - table and column aliases. + * + * @param e RelNode of type Correlate with two child nodes as input. Example: + *
+   *           LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{2}])
+   *                          /                                                  \
+   *    LogicalTableScan(table=[[hive, default, complex]])                   HiveUncollect
+   *                                                                               \
+   *           			          			             			          		  	LogicalProject(col=[$cor0.c])
+   *                                                                                 \
+   *           			          			            			         			 	LogicalValues(tuples=[[{ 0 }]])
+   *        
+ * + * @return Result of converting the RelNode to a SqlNode. + * The SqlNode generated by converting the above RelNode example is: + *
+   *                                      (SqlCall)
+   *        SqlJoin[default.complex , LATERAL UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)]
+   *                                         |
+   *                _________________________|_____________________________
+   *               |                         |                            |
+   *  left: `default`.`complex`         joinType: ,       right: LATERAL UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)
+   *
+   *        
+ */ + @Override public Result visit(Correlate e) { - final Result leftResult = visitChild(0, e.getLeft()).resetAlias(e.getCorrelVariable(), e.getLeft().getRowType()); - parseCorrelTable(e, leftResult); + final Result leftResult = visitChild(0, e.getLeft()).resetAlias(); + + // Add context specifying correlationId has same context as its left child + correlTableMap.put(e.getCorrelationId(), leftResult.qualifiedContext()); + final Result rightResult = visitChild(1, e.getRight()).resetAlias(); - SqlNode rightLateral = rightResult.node; - if (rightLateral.getKind() != SqlKind.AS) { - // LATERAL is only needed in Trino if it's not an AS node. - // For example, "FROM t0 CROSS JOIN UNNEST(yyy) AS t1(col1, col2)" is valid Trino SQL - // without the need of LATERAL keywords. - rightLateral = SqlStdOperatorTable.LATERAL.createCall(POS, rightLateral); - rightLateral = - SqlStdOperatorTable.AS.createCall(POS, rightLateral, new SqlIdentifier(rightResult.neededAlias, POS)); - } - final SqlNode join = new SqlJoin(POS, leftResult.asFrom(), SqlLiteral.createBoolean(false, POS), - JoinType.CROSS.symbol(POS), rightLateral, JoinConditionType.NONE.symbol(POS), null); + SqlNode rightSqlNode = generateRightChildForSqlJoinWithLateralViews(e, rightResult); + + SqlNode join = new SqlJoin(POS, leftResult.asFrom(), SqlLiteral.createBoolean(false, POS), + JoinType.COMMA.symbol(POS), rightSqlNode, JoinConditionType.NONE.symbol(POS), null); + return result(join, leftResult, rightResult); } + private SqlNode generateRightChildForSqlJoinWithLateralViews(BiRel e, Result rightResult) { + SqlNode rightSqlNode = rightResult.asFrom(); + SqlNode lateralNode; + + // Drop the AS operator from the rightSqlNode if it exists and append the LATERAL operator on the inner SqlNode. + if (rightSqlNode instanceof SqlCall && ((SqlCall) rightSqlNode).getOperator().kind == SqlKind.AS) { + lateralNode = SqlStdOperatorTable.LATERAL.createCall(POS, (SqlNode) ((SqlCall) rightSqlNode).operand(0)); + } else { + lateralNode = SqlStdOperatorTable.LATERAL.createCall(POS, rightSqlNode); + } + + // Append the alias to lateralNode by generating SqlCall with AS operator + RelDataType relDataType = e.getRight().getRowType(); + String alias = rightResult.aliases.entrySet().stream().filter(entry -> relDataType.equals(entry.getValue())) + .findFirst().map(Map.Entry::getKey).orElse("coralDefaultColumnAlias"); + + List asOperands = createAsFullOperands(relDataType, lateralNode, alias); + + return SqlStdOperatorTable.AS.createCall(POS, asOperands); + } + /** * Override this method to avoid the duplicated alias for {@link org.apache.calcite.rel.logical.LogicalValues}. * So that for the input SQL like `SELECT 1`, the translated SQL will be like: diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java new file mode 100644 index 000000000..ef2bda3e5 --- /dev/null +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java @@ -0,0 +1,51 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.trino.rel2trino.transformers; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; + +import com.linkedin.coral.common.transformers.SqlCallTransformer; + +import static org.apache.calcite.sql.parser.SqlParserPos.*; + + +/** + * This class implements the transformation of SqlCalls with AS operator in format: LATERAL UNNEST(x) AS y (z) + * to their corresponding Trino-compatible versions. + * + * For example, "LATERAL UNNEST(x) AS y (z)" is transformed to "UNNEST(x) AS y (z)" + */ +public class AsOperatorTransformer extends SqlCallTransformer { + + @Override + protected boolean condition(SqlCall sqlCall) { + return sqlCall.getOperator().getKind() == SqlKind.AS && sqlCall.operandCount() > 2 + && sqlCall.operand(0) instanceof SqlBasicCall && sqlCall.operand(0).getKind() == SqlKind.LATERAL; + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + List oldAliasOperands = sqlCall.getOperandList(); + List newAliasOperands = new ArrayList<>(); + SqlCall lateralSqlCall = sqlCall.operand(0); + + // Drop the LATERAL operator when a lateralSqlCall's operand's operator is UNNEST + SqlCall newAliasFirstOperand = + lateralSqlCall.operand(0).getKind() == SqlKind.UNNEST ? lateralSqlCall.operand(0) : lateralSqlCall; + + newAliasOperands.add(newAliasFirstOperand); + newAliasOperands.addAll(oldAliasOperands.subList(1, oldAliasOperands.size())); + + return SqlStdOperatorTable.AS.createCall(ZERO, newAliasOperands); + } +} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/JoinSqlCallTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/JoinSqlCallTransformer.java new file mode 100644 index 000000000..ba6c34773 --- /dev/null +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/JoinSqlCallTransformer.java @@ -0,0 +1,103 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.trino.rel2trino.transformers; + +import org.apache.calcite.sql.JoinConditionType; +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParserPos; + +import com.linkedin.coral.common.transformers.SqlCallTransformer; + +import static org.apache.calcite.rel.rel2sql.SqlImplementor.*; + + +/** + * This class implements the transformation of SqlCalls with JOIN operator with COMMA JoinType to + * their corresponding Trino-compatible versions. + * + * For example, an input SqlJoin SqlCall: + * + * SqlJoin[`default`.`complex` , UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)] + * | + * _________________________|_____________________________ + * | | | + * left: `default`.`complex` joinType: , right: UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`) + * + * Is transformed to: + * + * SqlJoin[`default`.`complex` CROSS JOIN UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)] + * | + * _________________________|_____________________________ + * | | | + * left: `default`.`complex` joinType: CROSS JOIN right: UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`) + */ +public class JoinSqlCallTransformer extends SqlCallTransformer { + @Override + protected boolean condition(SqlCall sqlCall) { + return sqlCall.getOperator().kind == SqlKind.JOIN && ((SqlJoin) sqlCall).getJoinType() == JoinType.COMMA; + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + SqlJoin joinSqlCall = (SqlJoin) sqlCall; + + // Check if there's an unnest SqlCall present in the nested SqlNodes + if (isUnnestOperatorPresentInRightSqlNode(joinSqlCall.getRight())) { + // Check if the unnest SqlCall is uncorrelated with the SqlJoin SqlCall + if (isUnnestSqlCallCorrelated(joinSqlCall.getRight())) { + // Substitute COMMA JOIN with CROSS JOIN + return createCrossJoinSqlCall(joinSqlCall); + } else { + return joinSqlCall; + } + } else { + // Substitute COMMA JOIN with CROSS JOIN + return createCrossJoinSqlCall(joinSqlCall); + } + } + + /** + * Check if the input sqlNode has a nested SqlCall with UNNEST operator + * @param rightSqlNode right child of a SqlJoin SqlCall + * @return boolean result + */ + private static boolean isUnnestOperatorPresentInRightSqlNode(SqlNode rightSqlNode) { + return rightSqlNode instanceof SqlCall && rightSqlNode.getKind() == SqlKind.AS + && ((SqlCall) rightSqlNode).operand(0) instanceof SqlCall + && ((SqlCall) rightSqlNode).operand(0).getKind() == SqlKind.UNNEST; + } + + private static boolean isUnnestSqlCallCorrelated(SqlNode sqlNode) { + SqlNode aliasOperand = ((SqlCall) sqlNode).operand(0); // unnest(x) + SqlNode unnestOperand = ((SqlCall) aliasOperand).operand(0); // x + + // When the unnest operand, 'x', is: + // (1) SqlIdentifier referring to a column, ex: table1.col1 + // (2) SqlCall with "IF" operator for outer unnest + // (3) SqlCall with "TRANSFORM" operator to support unnesting array of structs + // Substitute JoinType with CROSS JoinType. + if (unnestOperand.getKind() == SqlKind.IDENTIFIER + || (unnestOperand instanceof SqlCall + && ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("transform")) + || (unnestOperand instanceof SqlCall + && ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("if"))) { + return true; + } + // If the unnest SqlCall is uncorrelated with the SqlJoin, for example, + // when the unnest operand is an inline defined array, do not substitute JoinType + return false; + } + + private static SqlCall createCrossJoinSqlCall(SqlJoin sqlCall) { + return new SqlJoin(POS, (sqlCall).getLeft(), SqlLiteral.createBoolean(false, SqlParserPos.ZERO), + JoinType.CROSS.symbol(POS), (sqlCall).getRight(), JoinConditionType.NONE.symbol(SqlParserPos.ZERO), null); + } +} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/UnnestOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/UnnestOperatorTransformer.java new file mode 100644 index 000000000..6da189285 --- /dev/null +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/UnnestOperatorTransformer.java @@ -0,0 +1,86 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.trino.rel2trino.transformers; + +import java.util.ArrayList; +import java.util.Collections; + +import com.google.common.collect.ImmutableList; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFieldImpl; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCharStringLiteral; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; + +import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; +import com.linkedin.coral.common.transformers.SqlCallTransformer; +import com.linkedin.coral.trino.rel2trino.TrinoSqlDialect; +import com.linkedin.coral.trino.rel2trino.functions.TrinoArrayTransformFunction; + +import static org.apache.calcite.rel.rel2sql.SqlImplementor.*; + + +/** + * This class implements the transformation of SqlCalls with UNNEST operator to their + * corresponding Trino-compatible versions. + * + * When expanding an array of type struct, Coral IR returns a row set of a single column. This transformer + * wraps the unnest operand with an additional ROW to enable the equivalent operation in Trino. + * + * For example: + * Given table: + * t1(id INTEGER, arr array<struct<sa: int, sb: string>> ) + * and a Coral IR SqlCall: + * UNNEST(arr) + * + * The transformed SqlCall would be: + * UNNEST(TRANSFORM(arr, x -> ROW(x))) + */ +public class UnnestOperatorTransformer extends SqlCallTransformer { + @Override + protected boolean condition(SqlCall sqlCall) { + return sqlCall.getOperator() instanceof CoralSqlUnnestOperator; + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + CoralSqlUnnestOperator operator = (CoralSqlUnnestOperator) sqlCall.getOperator(); + SqlNode unnestOperand = sqlCall.operand(0); + + // Transform UNNEST(fieldName) to UNNEST(TRANSFORM(fieldName, x -> ROW(x))) + if (operator.getRelDataType() != null) { + String fieldName = unnestOperand.toSqlString(TrinoSqlDialect.INSTANCE).getSql(); + + if (unnestOperand instanceof SqlIdentifier) { + SqlIdentifier operand = (SqlIdentifier) unnestOperand; + fieldName = operand.toSqlString(TrinoSqlDialect.INSTANCE).getSql(); + } else if (unnestOperand instanceof SqlCall + && ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("if")) { + // for trino outer unnest, unnest has an inner SqlCall with "if" operator + fieldName = unnestOperand.toSqlString(TrinoSqlDialect.INSTANCE).getSql(); + } + SqlCharStringLiteral transformArgsLiteral = + SqlLiteral.createCharString(String.format("%s, x -> ROW(x)", fieldName), POS); + + // The crucial part in above transformation is call to TRANSFORM with lambda which adds extra layer of + // ROW wrapping. + // Generate expected recordType required for transformation + RelDataType recordType = operator.getRelDataType(); + RelRecordType transformDataType = + new RelRecordType(ImmutableList.of(new RelDataTypeFieldImpl("wrapper_field", 0, recordType))); + + // wrap unnested field to recordType by calling TRANSFORM with lambda which adds an extra layer of ROW wrapping + // and generates: transform(field, x -> ROW(x)) + unnestOperand = new TrinoArrayTransformFunction(transformDataType).createCall(POS, transformArgsLiteral); + } + + return operator.createCall(POS, new ArrayList<>(Collections.singletonList(unnestOperand)).toArray(new SqlNode[0])); + } +} diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 871d93321..3fa2b4fd5 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -109,29 +109,29 @@ public Object[][] viewTestCasesProvider() { + "SELECT \"tableq\".\"a\" AS \"a\", CAST(row(\"b\".\"b2\", \"b\".\"b1\", \"b\".\"b0\") as row(\"b2\" double, \"b1\" varchar, \"b0\" integer)) AS \"b\"\n" + "FROM \"test\".\"tableq\" AS \"tableq\"" }, - { "test", "view_with_explode_string_array", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_string_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"b\") AS \"t0\" (\"c\")" }, + { "test", "view_with_explode_string_array", "SELECT \"table_with_string_array\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" + + "FROM \"test\".\"table_with_string_array\" AS \"table_with_string_array\"\n" + + "CROSS JOIN UNNEST(\"table_with_string_array\".\"b\") AS \"t0\" (\"c\")" }, - { "test", "view_with_outer_explode_string_array", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_string_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"if\"(\"$cor0\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"$cor0\".\"b\") AS INTEGER) > 0, \"$cor0\".\"b\", ARRAY[NULL])) AS \"t0\" (\"c\")" }, + { "test", "view_with_outer_explode_string_array", "SELECT \"table_with_string_array\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" + + "FROM \"test\".\"table_with_string_array\" AS \"table_with_string_array\"\n" + + "CROSS JOIN UNNEST(\"if\"(\"table_with_string_array\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"table_with_string_array\".\"b\") AS INTEGER) > 0, \"table_with_string_array\".\"b\", ARRAY[NULL])) AS \"t0\" (\"c\")" }, - { "test", "view_with_explode_struct_array", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_struct_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(TRANSFORM(\"$cor0\".\"b\", x -> ROW(x))) AS \"t0\" (\"c\")" }, + { "test", "view_with_explode_struct_array", "SELECT \"table_with_struct_array\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" + + "FROM \"test\".\"table_with_struct_array\" AS \"table_with_struct_array\"\n" + + "CROSS JOIN UNNEST(TRANSFORM(\"table_with_struct_array\".\"b\", x -> ROW(x))) AS \"t0\" (\"c\")" }, - { "test", "view_with_outer_explode_struct_array", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_struct_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(TRANSFORM(\"if\"(\"$cor0\".\"b\" IS NOT NULL AND CARDINALITY(\"$cor0\".\"b\") > 0, \"$cor0\".\"b\", ARRAY[NULL]), x -> ROW(x))) AS \"t0\" (\"c\")" }, + { "test", "view_with_outer_explode_struct_array", "SELECT \"table_with_struct_array\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" + + "FROM \"test\".\"table_with_struct_array\" AS \"table_with_struct_array\"\n" + + "CROSS JOIN UNNEST(TRANSFORM(\"if\"(\"table_with_struct_array\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"table_with_struct_array\".\"b\") AS INTEGER) > 0, \"table_with_struct_array\".\"b\", ARRAY[NULL]), x -> ROW(x))) AS \"t0\" (\"c\")" }, - { "test", "view_with_explode_map", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\", \"t0\".\"d\" AS \"d\"\n" - + "FROM \"test\".\"table_with_map\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"b\") AS \"t0\" (\"c\", \"d\")" }, + { "test", "view_with_explode_map", "SELECT \"table_with_map\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\", \"t0\".\"d\" AS \"d\"\n" + + "FROM \"test\".\"table_with_map\" AS \"table_with_map\"\n" + + "CROSS JOIN UNNEST(\"table_with_map\".\"b\") AS \"t0\" (\"c\", \"d\")" }, - { "test", "view_with_outer_explode_map", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\", \"t0\".\"d\" AS \"d\"\n" - + "FROM \"test\".\"table_with_map\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"if\"(\"$cor0\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"$cor0\".\"b\") AS INTEGER) > 0, \"$cor0\".\"b\", MAP (ARRAY[NULL], ARRAY[NULL]))) AS \"t0\" (\"c\", \"d\")" }, + { "test", "view_with_outer_explode_map", "SELECT \"table_with_map\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\", \"t0\".\"d\" AS \"d\"\n" + + "FROM \"test\".\"table_with_map\" AS \"table_with_map\"\n" + + "CROSS JOIN UNNEST(\"if\"(\"table_with_map\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"table_with_map\".\"b\") AS INTEGER) > 0, \"table_with_map\".\"b\", MAP (ARRAY[NULL], ARRAY[NULL]))) AS \"t0\" (\"c\", \"d\")" }, { "test", "map_array_view", "SELECT MAP (ARRAY['key1', 'key2'], ARRAY['value1', 'value2']) AS \"simple_map_col\", MAP (ARRAY['key1', 'key2'], ARRAY[MAP (ARRAY['a', 'c'], ARRAY['b', 'd']), MAP (ARRAY['a', 'c'], ARRAY['b', 'd'])]) AS \"nested_map_col\"\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" }, @@ -142,19 +142,19 @@ public Object[][] viewTestCasesProvider() { { "test", "date_function_view", "SELECT \"date\"('2021-01-02') AS \"a\"\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" }, - { "test", "lateral_view_json_tuple_view", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"d\" AS \"d\", \"t0\".\"e\" AS \"e\", \"t0\".\"f\" AS \"f\"\n" - + "FROM \"test\".\"tablea\" AS \"$cor0\"\nCROSS JOIN LATERAL (SELECT " - + "\"if\"(\"REGEXP_LIKE\"('trino', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'trino' || '\"]') AS VARCHAR(65535)), NULL) AS \"d\", " - + "\"if\"(\"REGEXP_LIKE\"('always', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'always' || '\"]') AS VARCHAR(65535)), NULL) AS \"e\", " - + "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"" }, + { "test", "lateral_view_json_tuple_view", "SELECT \"tablea\".\"a\" AS \"a\", \"t0\".\"d\" AS \"d\", \"t0\".\"e\" AS \"e\", \"t0\".\"f\" AS \"f\"\n" + + "FROM \"test\".\"tablea\" AS \"tablea\"\nCROSS JOIN LATERAL (SELECT " + + "\"if\"(\"REGEXP_LIKE\"('trino', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'trino' || '\"]') AS VARCHAR(65535)), NULL) AS \"d\", " + + "\"if\"(\"REGEXP_LIKE\"('always', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'always' || '\"]') AS VARCHAR(65535)), NULL) AS \"e\", " + + "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n" + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\" (\"d\", \"e\", \"f\")" }, - { "test", "lateral_view_json_tuple_view_qualified", "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"d\" AS \"d\", \"t0\".\"e\" AS \"e\", \"t0\".\"f\" AS \"f\"\n" - + "FROM \"test\".\"tablea\" AS \"$cor0\"\nCROSS JOIN LATERAL (SELECT " - + "\"if\"(\"REGEXP_LIKE\"('trino', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'trino' || '\"]') AS VARCHAR(65535)), NULL) AS \"d\", " - + "\"if\"(\"REGEXP_LIKE\"('always', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'always' || '\"]') AS VARCHAR(65535)), NULL) AS \"e\", " - + "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"" }, + { "test", "lateral_view_json_tuple_view_qualified", "SELECT \"tablea\".\"a\" AS \"a\", \"t0\".\"d\" AS \"d\", \"t0\".\"e\" AS \"e\", \"t0\".\"f\" AS \"f\"\n" + + "FROM \"test\".\"tablea\" AS \"tablea\"\nCROSS JOIN LATERAL (SELECT " + + "\"if\"(\"REGEXP_LIKE\"('trino', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'trino' || '\"]') AS VARCHAR(65535)), NULL) AS \"d\", " + + "\"if\"(\"REGEXP_LIKE\"('always', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'always' || '\"]') AS VARCHAR(65535)), NULL) AS \"e\", " + + "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"tablea\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n" + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\" (\"d\", \"e\", \"f\")" }, { "test", "get_json_object_view", "SELECT \"json_extract\"(\"tablea\".\"b\".\"b1\", '$.name')\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" }, @@ -201,8 +201,7 @@ public void testLateralViewArray() { RelNode relNode = TestUtils.getHiveToRelConverter() .convertSql("SELECT col FROM (SELECT ARRAY('a1', 'a2') as a) tmp LATERAL VIEW EXPLODE(a) a_alias AS col"); String targetSql = "SELECT \"t2\".\"col\" AS \"col\"\n" + "FROM (SELECT ARRAY['a1', 'a2'] AS \"a\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"a\") AS \"t2\" (\"col\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + "CROSS JOIN UNNEST(\"t0\".\"a\") AS \"t2\" (\"col\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -229,8 +228,7 @@ public void testLateralViewArrayWithoutColumns() { RelNode relNode = TestUtils.getHiveToRelConverter() .convertSql("SELECT col FROM (SELECT ARRAY('a1', 'a2') as a) tmp LATERAL VIEW EXPLODE(a) a_alias"); String targetSql = "SELECT \"t2\".\"col\" AS \"col\"\n" + "FROM (SELECT ARRAY['a1', 'a2'] AS \"a\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"a\") AS \"t2\" (\"col\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + "CROSS JOIN UNNEST(\"t0\".\"a\") AS \"t2\" (\"col\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -243,8 +241,8 @@ public void testLateralViewMap() { "SELECT key, value FROM (SELECT MAP('key1', 'value1') as m) tmp LATERAL VIEW EXPLODE(m) m_alias AS key, value"); String targetSql = "SELECT \"t2\".\"key\" AS \"key\", \"t2\".\"value\" AS \"value\"\n" + "FROM (SELECT MAP (ARRAY['key1'], ARRAY['value1']) AS \"m\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"m\") AS \"t2\" (\"key\", \"value\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + + "CROSS JOIN UNNEST(\"t0\".\"m\") AS \"t2\" (\"key\", \"value\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -257,8 +255,8 @@ public void testLateralViewMapWithoutAlias() { .convertSql("SELECT key, value FROM (SELECT MAP('key1', 'value1') as m) tmp LATERAL VIEW EXPLODE(m) m_alias"); String targetSql = "SELECT \"t2\".\"KEY\" AS \"key\", \"t2\".\"VALUE\" AS \"value\"\n" + "FROM (SELECT MAP (ARRAY['key1'], ARRAY['value1']) AS \"m\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"m\") AS \"t2\" (\"KEY\", \"VALUE\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + + "CROSS JOIN UNNEST(\"t0\".\"m\") AS \"t2\" (\"KEY\", \"VALUE\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -270,8 +268,8 @@ public void testLateralViewPosExplodeWithAlias() { RelNode relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT col FROM (SELECT ARRAY('a1', 'a2') as a) tmp LATERAL VIEW POSEXPLODE(a) a_alias AS pos, col"); String targetSql = "SELECT \"t2\".\"col\" AS \"col\"\n" + "FROM (SELECT ARRAY['a1', 'a2'] AS \"a\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"a\") WITH ORDINALITY AS \"t2\" (\"col\", \"pos\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + + "CROSS JOIN UNNEST(\"t0\".\"a\") WITH ORDINALITY AS \"t2\" (\"col\", \"pos\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -283,8 +281,8 @@ public void testLateralViewPosExplodeWithoutAlias() { RelNode relNode = TestUtils.getHiveToRelConverter() .convertSql("SELECT col FROM (SELECT ARRAY('a1', 'a2') as a) tmp LATERAL VIEW POSEXPLODE(a) a_alias"); String targetSql = "SELECT \"t2\".\"col\" AS \"col\"\n" + "FROM (SELECT ARRAY['a1', 'a2'] AS \"a\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"a\") WITH ORDINALITY AS \"t2\" (\"col\", \"ORDINALITY\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + + "CROSS JOIN UNNEST(\"t0\".\"a\") WITH ORDINALITY AS \"t2\" (\"col\", \"ORDINALITY\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); @@ -296,40 +294,14 @@ public void testLateralViewOuterPosExplodeWithAlias() { RelNode relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT col FROM (SELECT ARRAY('a1', 'a2') as a) tmp LATERAL VIEW OUTER POSEXPLODE(a) a_alias AS pos, col"); String targetSql = "SELECT \"t2\".\"col\" AS \"col\"\n" + "FROM (SELECT ARRAY['a1', 'a2'] AS \"a\"\n" - + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"if\"(\"$cor0\".\"a\" IS NOT NULL AND CAST(CARDINALITY(\"$cor0\".\"a\") AS INTEGER) > 0, \"$cor0\".\"a\", ARRAY[NULL])) WITH ORDINALITY AS \"t2\" (\"col\", \"pos\")"; + + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"\n" + + "CROSS JOIN UNNEST(\"if\"(\"t0\".\"a\" IS NOT NULL AND CAST(CARDINALITY(\"t0\".\"a\") AS INTEGER) > 0, \"t0\".\"a\", ARRAY[NULL])) WITH ORDINALITY AS \"t2\" (\"col\", \"pos\")"; RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); String expandedSql = relToTrinoConverter.convert(relNode); assertEquals(expandedSql, targetSql); } - @Test - public void testLegacyUnnestArrayOfStruct() { - RelNode relNode = TestUtils.getHiveToRelConverter().convertView("test", "view_with_explode_struct_array"); - String targetSql = "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_struct_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"b\") AS \"t0\" (\"c\")"; - - RelToTrinoConverter relToTrinoConverter = - TestUtils.getRelToTrinoConverter(ImmutableMap.of(SUPPORT_LEGACY_UNNEST_ARRAY_OF_STRUCT, true)); - String expandedSql = relToTrinoConverter.convert(relNode); - assertEquals(expandedSql, targetSql); - } - - @Test - public void testLegacyOuterUnnestArrayOfStruct() { - RelNode relNode = TestUtils.getHiveToRelConverter().convertView("test", "view_with_outer_explode_struct_array"); - String targetSql = "SELECT \"$cor0\".\"a\" AS \"a\", \"t0\".\"c\" AS \"c\"\n" - + "FROM \"test\".\"table_with_struct_array\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"if\"(\"$cor0\".\"b\" IS NOT NULL AND CAST(CARDINALITY(\"$cor0\".\"b\") AS INTEGER) > 0, \"$cor0\".\"b\", ARRAY[NULL])) AS \"t0\" (\"c\")"; - - RelToTrinoConverter relToTrinoConverter = - TestUtils.getRelToTrinoConverter(ImmutableMap.of(SUPPORT_LEGACY_UNNEST_ARRAY_OF_STRUCT, true)); - String expandedSql = relToTrinoConverter.convert(relNode); - assertEquals(expandedSql, targetSql); - } - @Test public void testAvoidTransformToDate() { RelNode relNode = TestUtils.getHiveToRelConverter() diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverterTest.java index cc91f7985..83b860f55 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverterTest.java @@ -266,9 +266,9 @@ public void testUnnestConstant() { @Test public void testLateralViewUnnest() { String sql = "select icol, acol_elem from test.tableOne LATERAL VIEW explode(acol) t1 AS acol_elem"; - String expectedSql = "SELECT \"$cor0\".\"icol\" AS \"icol\", \"t0\".\"acol_elem\" AS \"acol_elem\"\n" - + "FROM \"test\".\"tableone\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"acol\") AS \"t0\" (\"acol_elem\")"; + String expectedSql = "SELECT \"tableone\".\"icol\" AS \"icol\", \"t0\".\"acol_elem\" AS \"acol_elem\"\n" + + "FROM \"test\".\"tableone\" AS \"tableone\"\n" + + "CROSS JOIN UNNEST(\"tableone\".\"acol\") AS \"t0\" (\"acol_elem\")"; testConversion(sql, expectedSql); } diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/trino2rel/TrinoToRelConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/trino2rel/TrinoToRelConverterTest.java index 883a656fe..da8ca31fb 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/trino2rel/TrinoToRelConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/trino2rel/TrinoToRelConverterTest.java @@ -127,55 +127,38 @@ public Iterator getSupportedSql() { + " LogicalTableScan(table=[[hive, default, my_table]])\n", "SELECT element_at(\"my_table\".\"x\", CAST(10 * SIN(\"my_table\".\"z\") AS BIGINT))\n" + "FROM \"default\".\"my_table\" AS \"my_table\"")) - .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3])", - "LogicalProject(EXPR$0=[$0])\n" + " HiveUncollect\n" + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" - + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"t0\".\"col\" AS \"col\"\n" + "FROM UNNEST(ARRAY[1, 2, 3]) AS \"t0\" (\"col\")")) - .add(new TrinoToRelTestDataProvider("select x from unnest(array[1, 2, 3]) t(x)", - "LogicalProject(X=[$0])\n" + " HiveUncollect\n" + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" - + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT *\n" + "FROM UNNEST(ARRAY[1, 2, 3]) AS \"t0\" (\"X\")")) .add(new TrinoToRelTestDataProvider("select * from my_table cross join unnest(x)", "LogicalProject(x=[$0], y=[$1], z=[$2], EXPR$0=[$3])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + " LogicalTableScan(table=[[hive, default, my_table]])\n" + " HiveUncollect\n" + " LogicalProject(col=[$cor0.x])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"$cor0\".\"x\" AS \"x\", \"$cor0\".\"y\" AS \"y\", \"$cor0\".\"z\" AS \"z\", \"t0\".\"col\" AS \"col\"\n" - + "FROM \"default\".\"my_table\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"x\") AS \"t0\" (\"col\")")) + "SELECT \"my_table\".\"x\" AS \"x\", \"my_table\".\"y\" AS \"y\", \"my_table\".\"z\" AS \"z\", \"t0\".\"col\" AS \"col\"\n" + + "FROM \"default\".\"my_table\" AS \"my_table\"\n" + + "CROSS JOIN UNNEST(\"my_table\".\"x\") AS \"t0\" (\"col\")")) .add(new TrinoToRelTestDataProvider("select z from my_table cross join unnest(x) t(x_)", "LogicalProject(Z=[$2])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + " LogicalTableScan(table=[[hive, default, my_table]])\n" + " HiveUncollect\n" + " LogicalProject(col=[$cor0.x])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"$cor0\".\"z\" AS \"Z\"\n" + "FROM \"default\".\"my_table\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"x\") AS \"t0\" (\"X_\")")) - .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3]) with ordinality", - "LogicalProject(EXPR$0=[$0], ORDINALITY=[$1])\n" + " HiveUncollect(withOrdinality=[true])\n" - + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"t0\".\"col\" AS \"col\", \"t0\".\"ORDINALITY\" AS \"ORDINALITY\"\n" - + "FROM UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS \"t0\" (\"col\", \"ORDINALITY\")")) - .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3]) with ordinality t(x, y)", - "LogicalProject(X=[$0], Y=[$1])\n" + " HiveUncollect(withOrdinality=[true])\n" - + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT *\n" + "FROM UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS \"t0\" (\"X\", \"Y\")")) + "SELECT \"my_table\".\"z\" AS \"Z\"\n" + "FROM \"default\".\"my_table\" AS \"my_table\"\n" + + "CROSS JOIN UNNEST(\"my_table\".\"x\") AS \"t0\" (\"X_\")")) .add(new TrinoToRelTestDataProvider("select * from my_table cross join unnest(x) with ordinality", "LogicalProject(x=[$0], y=[$1], z=[$2], EXPR$0=[$3], ORDINALITY=[$4])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + " LogicalTableScan(table=[[hive, default, my_table]])\n" + " HiveUncollect(withOrdinality=[true])\n" + " LogicalProject(col=[$cor0.x])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"$cor0\".\"x\" AS \"x\", \"$cor0\".\"y\" AS \"y\", \"$cor0\".\"z\" AS \"z\", \"t0\".\"col\" AS \"col\", \"t0\".\"ORDINALITY\" AS \"ORDINALITY\"\n" - + "FROM \"default\".\"my_table\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"x\") WITH ORDINALITY AS \"t0\" (\"col\", \"ORDINALITY\")")) + "SELECT \"my_table\".\"x\" AS \"x\", \"my_table\".\"y\" AS \"y\", \"my_table\".\"z\" AS \"z\", \"t0\".\"col\" AS \"col\", \"t0\".\"ORDINALITY\" AS \"ORDINALITY\"\n" + + "FROM \"default\".\"my_table\" AS \"my_table\"\n" + + "CROSS JOIN UNNEST(\"my_table\".\"x\") WITH ORDINALITY AS \"t0\" (\"col\", \"ORDINALITY\")")) .add(new TrinoToRelTestDataProvider("select z from my_table cross join unnest(x) with ordinality t(a, b)", "LogicalProject(Z=[$2])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + " LogicalTableScan(table=[[hive, default, my_table]])\n" + " HiveUncollect(withOrdinality=[true])\n" + " LogicalProject(col=[$cor0.x])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", - "SELECT \"$cor0\".\"z\" AS \"Z\"\n" + "FROM \"default\".\"my_table\" AS \"$cor0\"\n" - + "CROSS JOIN UNNEST(\"$cor0\".\"x\") WITH ORDINALITY AS \"t0\" (\"A\", \"B\")")) + "SELECT \"my_table\".\"z\" AS \"Z\"\n" + "FROM \"default\".\"my_table\" AS \"my_table\"\n" + + "CROSS JOIN UNNEST(\"my_table\".\"x\") WITH ORDINALITY AS \"t0\" (\"A\", \"B\")")) .add(new TrinoToRelTestDataProvider( "with a (id) as (with x as (select 123 from foo) select * from x) , b (id) as (select 999 from foo) select * from a join b using (id)", "LogicalProject(ID=[COALESCE($0, $1)])\n" + " LogicalJoin(condition=[=($0, $1)], joinType=[inner])\n" @@ -256,4 +239,41 @@ public void testSupport(String trinoSql, String expectedRelString, String expect assertEquals(expectedSql, expandedSql); } + @DataProvider(name = "Unsupported") + public Iterator getUnsupportedSql() { + return ImmutableList. builder() + .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3])", + "LogicalProject(EXPR$0=[$0])\n" + " HiveUncollect\n" + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n", + "SELECT \"col\"\n" + "FROM UNNEST(ARRAY[1, 2, 3]) AS \"t0\" (\"col\")")) + .add(new TrinoToRelTestDataProvider("select x from unnest(array[1, 2, 3]) t(x)", + "LogicalProject(X=[$0])\n" + " HiveUncollect\n" + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n", + "SELECT \"X\"\n" + "FROM UNNEST(ARRAY[1, 2, 3]) AS \"t0\" (\"X\")")) + .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3]) with ordinality", + "LogicalProject(EXPR$0=[$0], ORDINALITY=[$1])\n" + " HiveUncollect(withOrdinality=[true])\n" + + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", + "SELECT \"col\", \"ORDINALITY\"\n" + + "FROM UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS \"t0\" (\"col\", \"ORDINALITY\")")) + .add(new TrinoToRelTestDataProvider("select * from unnest(array[1, 2, 3]) with ordinality t(x, y)", + "LogicalProject(X=[$0], Y=[$1])\n" + " HiveUncollect(withOrdinality=[true])\n" + + " LogicalProject(col=[ARRAY(1, 2, 3)])\n" + " LogicalValues(tuples=[[{ 0 }]])\n", + "SELECT \"X\", \"Y\"\n" + "FROM UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS \"t0\" (\"X\", \"Y\")")) + .add(new TrinoToRelTestDataProvider( + "SELECT * from default.table_with_struct_arr cross join unnest(struct.b) AS t(b1col, b2col)", null, null)) + .build().stream().map(x -> new Object[] { x.trinoSql, x.expectedRelString, x.expectedSql }).iterator(); + } + + @Test(dataProvider = "Unsupported", enabled = false, + description = "Input Trino SQLs which do not conform to a valid Coral IR representation") + public void testUnsupported(String trinoSql, String expectedRelString, String expectedSql) { + RelNode relNode = getTrinoToRelConverter().convertSql(trinoSql); + assertEquals(relToStr(relNode), expectedRelString); + + RelToTrinoConverter relToTrinoConverter = getRelToTrinoConverter(); + // Convert rel node back to Sql + String expandedSql = relToTrinoConverter.convert(relNode); + assertEquals(expectedSql, expandedSql); + } + }