From cb7a452836af27dba56ca43d72c9171d8bca76bf Mon Sep 17 00:00:00 2001 From: Walaa Eldin Moustafa Date: Fri, 19 May 2023 14:10:44 -0700 Subject: [PATCH 1/4] [Coral-Hive] Simply IN operator --- .../hive/hive2rel/HiveConvertletTable.java | 22 +------ .../hive/hive2rel/functions/HiveFunction.java | 12 ++-- .../hive2rel/functions/HiveInOperator.java | 61 +++++++++---------- 3 files changed, 35 insertions(+), 60 deletions(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveConvertletTable.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveConvertletTable.java index e5a8ed35d..2d47097a0 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveConvertletTable.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveConvertletTable.java @@ -1,5 +1,5 @@ /** - * Copyright 2018-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2018-2023 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -8,14 +8,11 @@ import java.util.ArrayList; import java.util.List; -import com.google.common.base.Preconditions; - import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlDataTypeSpec; import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.fun.SqlCastFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql2rel.ReflectiveConvertletTable; @@ -23,9 +20,7 @@ import org.apache.calcite.sql2rel.SqlRexConvertlet; import org.apache.calcite.sql2rel.StandardConvertletTable; -import com.linkedin.coral.com.google.common.collect.ImmutableList; import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; -import com.linkedin.coral.hive.hive2rel.functions.HiveInOperator; import com.linkedin.coral.hive.hive2rel.functions.HiveNamedStructFunction; @@ -46,21 +41,6 @@ public RexNode convertNamedStruct(SqlRexContext cx, HiveNamedStructFunction func return cx.getRexBuilder().makeCast(retType, rowNode); } - @SuppressWarnings("unused") - public RexNode convertHiveInOperator(SqlRexContext cx, HiveInOperator operator, SqlCall call) { - List operandList = call.getOperandList(); - Preconditions.checkState(operandList.size() == 2 && operandList.get(1) instanceof SqlNodeList); - RexNode lhs = cx.convertExpression(operandList.get(0)); - SqlNodeList rhsNodes = (SqlNodeList) operandList.get(1); - ImmutableList.Builder rexNodes = ImmutableList. builder().add(lhs); - for (int i = 0; i < rhsNodes.size(); i++) { - rexNodes.add(cx.convertExpression(rhsNodes.get(i))); - } - - RelDataType retType = cx.getValidator().getValidatedNodeType(call); - return cx.getRexBuilder().makeCall(retType, HiveInOperator.IN, rexNodes.build()); - } - @SuppressWarnings("unused") public RexNode convertFunctionFieldReferenceOperator(SqlRexContext cx, FunctionFieldReferenceOperator op, SqlCall call) { diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java index 13e2beffc..fb09b41bc 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveFunction.java @@ -1,5 +1,5 @@ /** - * Copyright 2018-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2018-2023 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -96,18 +96,14 @@ public SqlCall createCall(SqlNode function, List operands, SqlLiteral q @Override public SqlCall createCall(SqlNode function, List operands, SqlLiteral qualifier) { checkState(operands.size() >= 2); - SqlNode lhs = operands.get(0); - SqlNode firstRhs = operands.get(1); - if (firstRhs instanceof SqlSelect) { + if (operands.get(1) instanceof SqlSelect) { // for IN subquery use Calcite IN operator. Calcite IN operator // will turn it into inner join, which not ideal but that's better // tested. return SqlStdOperatorTable.IN.createCall(ZERO, operands); } else { - // column IN values () clause - List rhsList = operands.subList(1, operands.size()); - SqlNodeList rhs = new SqlNodeList(rhsList, ZERO); - return getSqlOperator().createCall(ZERO, lhs, rhs); + // For IN whose operand is a list of values, we use custom IN operator {@link HiveInOperator}. + return getSqlOperator().createCall(ZERO, operands); } } }; diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java index 80e2d1017..ba6b5f4ee 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java @@ -1,5 +1,5 @@ /** - * Copyright 2018-2021 LinkedIn Corporation. All rights reserved. + * Copyright 2018-2023 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -34,6 +34,7 @@ import org.apache.calcite.sql.validate.SqlValidatorImpl; import org.apache.calcite.sql.validate.SqlValidatorScope; +import static org.apache.calcite.sql.parser.SqlParserPos.ZERO; import static org.apache.calcite.util.Static.*; @@ -47,6 +48,8 @@ * different from set of OR predicates * 2. In some cases, calcite turns IN clause to INNER JOIN query on a set of values. We * again want to prevent this conversion. + * Unlike Calcite's IN operator, whose operands are (expr, exprList), this operator's operands + * are (expr, exprList[0], expr[1], ...), where (exp) is the expression preceding IN operator. */ public class HiveInOperator extends SqlSpecialOperator { @@ -73,46 +76,42 @@ public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) writer.endList(listFrame); } - // copied from Calcite' SqlInOperator + // Adapts the parent's behavior to allow IN to have any number of arguments (i.e., list elements). public RelDataType deriveType(SqlValidator validator, SqlValidatorScope scope, SqlCall call) { final List operands = call.getOperandList(); - assert operands.size() == 2; final SqlNode left = operands.get(0); - final SqlNode right = operands.get(1); + final SqlNodeList right = new SqlNodeList(ZERO); + for (int i = 1; i < operands.size(); i++) { + right.add(operands.get(i)); + } final RelDataTypeFactory typeFactory = validator.getTypeFactory(); RelDataType leftType = validator.deriveType(scope, left); RelDataType rightType; // Derive type for RHS. - if (right instanceof SqlNodeList) { - // Handle the 'IN (expr, ...)' form. - List rightTypeList = new ArrayList<>(); - SqlNodeList nodeList = (SqlNodeList) right; - for (int i = 0; i < nodeList.size(); i++) { - SqlNode node = nodeList.get(i); - RelDataType nodeType = validator.deriveType(scope, node); - rightTypeList.add(nodeType); - } - rightType = typeFactory.leastRestrictive(rightTypeList); - - // First check that the expressions in the IN list are compatible - // with each other. Same rules as the VALUES operator (per - // SQL:2003 Part 2 Section 8.4, ). - if (null == rightType && validator.isTypeCoercionEnabled()) { - // Do implicit type cast if it is allowed to. - rightType = validator.getTypeCoercion().getWiderTypeFor(rightTypeList, true); - } - if (null == rightType) { - throw validator.newValidationError(right, RESOURCE.incompatibleTypesInList()); - } - - // Record the RHS type for use by SqlToRelConverter. - ((SqlValidatorImpl) validator).setValidatedNodeType(nodeList, rightType); - } else { - // Handle the 'IN (query)' form. - rightType = validator.deriveType(scope, right); + List rightTypeList = new ArrayList<>(); + for (int i = 1; i < right.size(); i++) { + SqlNode node = right.get(i); + RelDataType nodeType = validator.deriveType(scope, node); + rightTypeList.add(nodeType); } + rightType = typeFactory.leastRestrictive(rightTypeList); + + // First check that the expressions in the IN list are compatible + // with each other. Same rules as the VALUES operator (per + // SQL:2003 Part 2 Section 8.4, ). + if (null == rightType && validator.isTypeCoercionEnabled()) { + // Do implicit type cast if it is allowed to. + rightType = validator.getTypeCoercion().getWiderTypeFor(rightTypeList, true); + } + if (null == rightType) { + throw validator.newValidationError(right, RESOURCE.incompatibleTypesInList()); + } + + // Record the RHS type for use by SqlToRelConverter. + ((SqlValidatorImpl) validator).setValidatedNodeType(right, rightType); + SqlCallBinding callBinding = new SqlCallBinding(validator, scope, call); // Coerce type first. if (callBinding.getValidator().isTypeCoercionEnabled()) { From 732470b02dfd098da3369af1af16592ebe640a33 Mon Sep 17 00:00:00 2001 From: Walaa Eldin Moustafa Date: Fri, 19 May 2023 14:24:49 -0700 Subject: [PATCH 2/4] Fix comment --- .../coral/hive/hive2rel/functions/HiveInOperator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java index ba6b5f4ee..d401fc475 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java @@ -48,8 +48,8 @@ * different from set of OR predicates * 2. In some cases, calcite turns IN clause to INNER JOIN query on a set of values. We * again want to prevent this conversion. - * Unlike Calcite's IN operator, whose operands are (expr, exprList), this operator's operands - * are (expr, exprList[0], expr[1], ...), where (exp) is the expression preceding IN operator. + * Unlike Calcite's IN operator, whose operands are (expr, values), this operator's operands + * are (expr, values[0], values[1], ...), where (exp) is the expression preceding IN operator. */ public class HiveInOperator extends SqlSpecialOperator { From 54ca26a235eaae5407ff984b346124b4a4abea51 Mon Sep 17 00:00:00 2001 From: Walaa Eldin Moustafa Date: Fri, 19 May 2023 14:38:09 -0700 Subject: [PATCH 3/4] Add Spark test case --- .../test/java/com/linkedin/coral/spark/CoralSparkTest.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index 29e91e76a..c172e33a5 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -689,6 +689,13 @@ public void testAliasHaving() { assertEquals(CoralSpark.create(relNode).getSparkSql(), targetSql); } + @Test + public void testInOperator() { + RelNode relNode = TestUtils.toRelNode("SELECT a from foo where b in ('v1', 'v2')"); + String targetSql = "SELECT foo.a\n" + "FROM default.foo foo\n" + "WHERE foo.b IN ('v1', 'v2')"; + assertEquals(CoralSpark.create(relNode).getSparkSql(), targetSql); + } + @Test public void testCastDecimal() { RelNode relNode = TestUtils.toRelNode("SELECT CAST(a as DECIMAL(6, 2)) as casted_decimal FROM default.foo"); From b1ab6118b016b780f02f770cc0851de4d3f23076 Mon Sep 17 00:00:00 2001 From: Walaa Eldin Moustafa Date: Fri, 19 May 2023 14:49:15 -0700 Subject: [PATCH 4/4] Fix loop start index --- .../linkedin/coral/hive/hive2rel/functions/HiveInOperator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java index d401fc475..7ffec8a5f 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/HiveInOperator.java @@ -91,7 +91,7 @@ public RelDataType deriveType(SqlValidator validator, SqlValidatorScope scope, S // Derive type for RHS. List rightTypeList = new ArrayList<>(); - for (int i = 1; i < right.size(); i++) { + for (int i = 0; i < right.size(); i++) { SqlNode node = right.get(i); RelDataType nodeType = validator.deriveType(scope, node); rightTypeList.add(nodeType);