Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Coral-Trino] Migrate UNNEST From RelToTrinoConverter to SqlCallConverter layer #428

Merged
merged 5 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@
import com.linkedin.coral.hive.hive2rel.functions.HiveRLikeOperator;
import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry;
import com.linkedin.coral.trino.rel2trino.functions.TrinoElementAtFunction;
import com.linkedin.coral.trino.rel2trino.transformers.AsOperatorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CollectListOrSetFunctionTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CurrentTimestampTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.GenericCoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.JoinSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.MapValueConstructorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.ReturnTypeAdjustmentTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.SqlSelectAliasAppenderTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.ToDateOperatorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.UnnestOperatorTransformer;

import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;

Expand Down Expand Up @@ -120,7 +123,8 @@ protected SqlCall transform(SqlCall sqlCall) {
"com.linkedin.stdudfs.hive.daliudfs.UrnExtractorFunctionWrapper", 1, "urn_extractor"),
new GenericCoralRegistryOperatorRenameSqlCallTransformer(),

new ReturnTypeAdjustmentTransformer(configs));
new ReturnTypeAdjustmentTransformer(configs), new UnnestOperatorTransformer(), new AsOperatorTransformer(),
aastha25 marked this conversation as resolved.
Show resolved Hide resolved
new JoinSqlCallTransformer());
}

private SqlOperator hiveToCoralSqlOperator(String functionName) {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino.transformers;

import java.util.ArrayList;
import java.util.List;

import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;

import com.linkedin.coral.common.transformers.SqlCallTransformer;

import static org.apache.calcite.sql.parser.SqlParserPos.*;


/**
* This class implements the transformation of SqlCalls with AS operator in format: LATERAL UNNEST(x) AS y (z)
* to their corresponding Trino-compatible versions.
*
* For example, "LATERAL UNNEST(x) AS y (z)" is transformed to "UNNEST(x) AS y (z)"
*/
public class AsOperatorTransformer extends SqlCallTransformer {

@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator().getKind() == SqlKind.AS && sqlCall.operandCount() > 2
&& sqlCall.operand(0) instanceof SqlBasicCall && sqlCall.operand(0).getKind() == SqlKind.LATERAL;
}

@Override
protected SqlCall transform(SqlCall sqlCall) {
List<SqlNode> oldAliasOperands = sqlCall.getOperandList();
List<SqlNode> newAliasOperands = new ArrayList<>();
SqlCall lateralSqlCall = sqlCall.operand(0);

// Drop the LATERAL operator when a lateralSqlCall's operand's operator is UNNEST
SqlCall newAliasFirstOperand =
lateralSqlCall.operand(0).getKind() == SqlKind.UNNEST ? lateralSqlCall.operand(0) : lateralSqlCall;

newAliasOperands.add(newAliasFirstOperand);
newAliasOperands.addAll(oldAliasOperands.subList(1, oldAliasOperands.size()));

return SqlStdOperatorTable.AS.createCall(ZERO, newAliasOperands);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino.transformers;

import org.apache.calcite.sql.JoinConditionType;
import org.apache.calcite.sql.JoinType;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlJoin;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParserPos;

import com.linkedin.coral.common.transformers.SqlCallTransformer;

import static org.apache.calcite.rel.rel2sql.SqlImplementor.*;


/**
* This class implements the transformation of SqlCalls with JOIN operator with COMMA JoinType to
* their corresponding Trino-compatible versions.
*
* For example, an input SqlJoin SqlCall:
*
* SqlJoin[`default`.`complex` , UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)]
* |
* _________________________|_____________________________
* | | |
* left: `default`.`complex` joinType: , right: UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)
*
* Is transformed to:
*
* SqlJoin[`default`.`complex` CROSS JOIN UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)]
* |
* _________________________|_____________________________
* | | |
* left: `default`.`complex` joinType: CROSS JOIN right: UNNEST(`complex`.`c`) AS `t_alias` (`col_alias`)
*/
public class JoinSqlCallTransformer extends SqlCallTransformer {
@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator().kind == SqlKind.JOIN && ((SqlJoin) sqlCall).getJoinType() == JoinType.COMMA;
}

@Override
protected SqlCall transform(SqlCall sqlCall) {
SqlJoin joinSqlCall = (SqlJoin) sqlCall;

// Check if there's an unnest SqlCall present in the nested SqlNodes
if (isUnnestOperatorPresentInRightSqlNode(joinSqlCall.getRight())) {
// Check if the unnest SqlCall is uncorrelated with the SqlJoin SqlCall
if (isUnnestSqlCallCorrelated(joinSqlCall.getRight())) {
// Substitute COMMA JOIN with CROSS JOIN
return createCrossJoinSqlCall(joinSqlCall);
} else {
return joinSqlCall;
}
} else {
// Substitute COMMA JOIN with CROSS JOIN
return createCrossJoinSqlCall(joinSqlCall);
}
}

/**
* Check if the input sqlNode has a nested SqlCall with UNNEST operator
* @param rightSqlNode right child of a SqlJoin SqlCall
* @return boolean result
*/
private static boolean isUnnestOperatorPresentInRightSqlNode(SqlNode rightSqlNode) {
return rightSqlNode instanceof SqlCall && rightSqlNode.getKind() == SqlKind.AS
&& ((SqlCall) rightSqlNode).operand(0) instanceof SqlCall
&& ((SqlCall) rightSqlNode).operand(0).getKind() == SqlKind.UNNEST;
}

private static boolean isUnnestSqlCallCorrelated(SqlNode sqlNode) {
SqlNode aliasOperand = ((SqlCall) sqlNode).operand(0); // unnest(x)
SqlNode unnestOperand = ((SqlCall) aliasOperand).operand(0); // x

// When the unnest operand, 'x', is:
// (1) SqlIdentifier referring to a column, ex: table1.col1
// (2) SqlCall with "IF" operator for outer unnest
// (3) SqlCall with "TRANSFORM" operator to support unnesting array of structs
// Substitute JoinType with CROSS JoinType.
if (unnestOperand.getKind() == SqlKind.IDENTIFIER
|| (unnestOperand instanceof SqlCall
&& ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("transform"))
|| (unnestOperand instanceof SqlCall
&& ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("if"))) {
return true;
}
// If the unnest SqlCall is uncorrelated with the SqlJoin, for example,
// when the unnest operand is an inline defined array, do not substitute JoinType
return false;
}

private static SqlCall createCrossJoinSqlCall(SqlJoin sqlCall) {
return new SqlJoin(POS, (sqlCall).getLeft(), SqlLiteral.createBoolean(false, SqlParserPos.ZERO),
JoinType.CROSS.symbol(POS), (sqlCall).getRight(), JoinConditionType.NONE.symbol(SqlParserPos.ZERO), null);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino.transformers;

import java.util.ArrayList;
import java.util.Collections;

import com.google.common.collect.ImmutableList;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFieldImpl;
import org.apache.calcite.rel.type.RelRecordType;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlCharStringLiteral;
import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;

import com.linkedin.coral.common.functions.CoralSqlUnnestOperator;
import com.linkedin.coral.common.transformers.SqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.TrinoSqlDialect;
import com.linkedin.coral.trino.rel2trino.functions.TrinoArrayTransformFunction;

import static org.apache.calcite.rel.rel2sql.SqlImplementor.*;


/**
* This class implements the transformation of SqlCalls with UNNEST operator to their
* corresponding Trino-compatible versions.
*
* When expanding an array of type struct, Coral IR returns a row set of a single column. This transformer
* wraps the unnest operand with an additional ROW to enable the equivalent operation in Trino.
*
* For example:
* Given table:
* t1(id INTEGER, arr array&lt;struct&lt;sa: int, sb: string&gt;&gt; )
* and a Coral IR SqlCall:
* UNNEST(arr)
*
* The transformed SqlCall would be:
* UNNEST(TRANSFORM(arr, x -&gt; ROW(x)))
*/
public class UnnestOperatorTransformer extends SqlCallTransformer {
@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator() instanceof CoralSqlUnnestOperator;
}

@Override
protected SqlCall transform(SqlCall sqlCall) {
CoralSqlUnnestOperator operator = (CoralSqlUnnestOperator) sqlCall.getOperator();
SqlNode unnestOperand = sqlCall.operand(0);

// Transform UNNEST(fieldName) to UNNEST(TRANSFORM(fieldName, x -> ROW(x)))
if (operator.getRelDataType() != null) {
String fieldName = "empty";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the initial value of fieldName is "empty"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modified it to String fieldName = unnestOperand.toSqlString(TrinoSqlDialect.INSTANCE).getSql();
Probably the if - else if section below it can be removed but I'll get to that in a follow-up PR after i-testing again. Thanks for bringing it up.


if (unnestOperand instanceof SqlIdentifier) {
SqlIdentifier operand = (SqlIdentifier) unnestOperand;
fieldName = operand.toSqlString(TrinoSqlDialect.INSTANCE).getSql();
} else if (unnestOperand instanceof SqlCall
&& ((SqlCall) unnestOperand).getOperator().getName().equalsIgnoreCase("if")) {
// for trino outer unnest, unnest has an inner SqlCall with "if" operator
fieldName = unnestOperand.toSqlString(TrinoSqlDialect.INSTANCE).getSql();
}
SqlCharStringLiteral transformArgsLiteral =
SqlLiteral.createCharString(String.format("%s, x -> ROW(x)", fieldName), POS);

// The crucial part in above transformation is call to TRANSFORM with lambda which adds extra layer of
// ROW wrapping.
// Generate expected recordType required for transformation
RelDataType recordType = operator.getRelDataType();
RelRecordType transformDataType =
new RelRecordType(ImmutableList.of(new RelDataTypeFieldImpl("wrapper_field", 0, recordType)));

// wrap unnested field to recordType by calling TRANSFORM with lambda which adds an extra layer of ROW wrapping
// and generates: transform(field, x -> ROW(x))
unnestOperand = new TrinoArrayTransformFunction(transformDataType).createCall(POS, transformArgsLiteral);
}

return operator.createCall(POS, new ArrayList<>(Collections.singletonList(unnestOperand)).toArray(new SqlNode[0]));
}
}
Loading