Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Coral-Hive] Simplify IN operator #410

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2022 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -8,24 +8,19 @@
import java.util.ArrayList;
import java.util.List;

import com.google.common.base.Preconditions;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDataTypeSpec;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql2rel.ReflectiveConvertletTable;
import org.apache.calcite.sql2rel.SqlRexContext;
import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.calcite.sql2rel.StandardConvertletTable;

import com.linkedin.coral.com.google.common.collect.ImmutableList;
import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator;
import com.linkedin.coral.hive.hive2rel.functions.HiveInOperator;
import com.linkedin.coral.hive.hive2rel.functions.HiveNamedStructFunction;


Expand All @@ -46,21 +41,6 @@ public RexNode convertNamedStruct(SqlRexContext cx, HiveNamedStructFunction func
return cx.getRexBuilder().makeCast(retType, rowNode);
}

@SuppressWarnings("unused")
public RexNode convertHiveInOperator(SqlRexContext cx, HiveInOperator operator, SqlCall call) {
List<SqlNode> operandList = call.getOperandList();
Preconditions.checkState(operandList.size() == 2 && operandList.get(1) instanceof SqlNodeList);
RexNode lhs = cx.convertExpression(operandList.get(0));
SqlNodeList rhsNodes = (SqlNodeList) operandList.get(1);
ImmutableList.Builder<RexNode> rexNodes = ImmutableList.<RexNode> builder().add(lhs);
for (int i = 0; i < rhsNodes.size(); i++) {
rexNodes.add(cx.convertExpression(rhsNodes.get(i)));
}

RelDataType retType = cx.getValidator().getValidatedNodeType(call);
return cx.getRexBuilder().makeCall(retType, HiveInOperator.IN, rexNodes.build());
}

@SuppressWarnings("unused")
public RexNode convertFunctionFieldReferenceOperator(SqlRexContext cx, FunctionFieldReferenceOperator op,
SqlCall call) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2022 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -96,18 +96,14 @@ public SqlCall createCall(SqlNode function, List<SqlNode> operands, SqlLiteral q
@Override
public SqlCall createCall(SqlNode function, List<SqlNode> operands, SqlLiteral qualifier) {
checkState(operands.size() >= 2);
SqlNode lhs = operands.get(0);
SqlNode firstRhs = operands.get(1);
if (firstRhs instanceof SqlSelect) {
if (operands.get(1) instanceof SqlSelect) {
// for IN subquery use Calcite IN operator. Calcite IN operator
// will turn it into inner join, which not ideal but that's better
// tested.
return SqlStdOperatorTable.IN.createCall(ZERO, operands);
} else {
// column IN values () clause
List<SqlNode> rhsList = operands.subList(1, operands.size());
SqlNodeList rhs = new SqlNodeList(rhsList, ZERO);
return getSqlOperator().createCall(ZERO, lhs, rhs);
// For IN whose operand is a list of values, we use custom IN operator {@link HiveInOperator}.
return getSqlOperator().createCall(ZERO, operands);
}
}
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2021 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -34,6 +34,7 @@
import org.apache.calcite.sql.validate.SqlValidatorImpl;
import org.apache.calcite.sql.validate.SqlValidatorScope;

import static org.apache.calcite.sql.parser.SqlParserPos.ZERO;
import static org.apache.calcite.util.Static.*;


Expand All @@ -47,6 +48,8 @@
* different from set of OR predicates
* 2. In some cases, calcite turns IN clause to INNER JOIN query on a set of values. We
* again want to prevent this conversion.
* Unlike Calcite's IN operator, whose operands are (expr, values), this operator's operands
* are (expr, values[0], values[1], ...), where (exp) is the expression preceding IN operator.
*/
public class HiveInOperator extends SqlSpecialOperator {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As part of refactoring this operator, can you also rename this operator to CoralINOperator?


Expand All @@ -73,46 +76,42 @@ public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec)
writer.endList(listFrame);
}

// copied from Calcite' SqlInOperator
// Adapts the parent's behavior to allow IN to have any number of arguments (i.e., list elements).
public RelDataType deriveType(SqlValidator validator, SqlValidatorScope scope, SqlCall call) {
final List<SqlNode> operands = call.getOperandList();
assert operands.size() == 2;
final SqlNode left = operands.get(0);
final SqlNode right = operands.get(1);
final SqlNodeList right = new SqlNodeList(ZERO);
for (int i = 1; i < operands.size(); i++) {
right.add(operands.get(i));
}

final RelDataTypeFactory typeFactory = validator.getTypeFactory();
RelDataType leftType = validator.deriveType(scope, left);
RelDataType rightType;

// Derive type for RHS.
if (right instanceof SqlNodeList) {
// Handle the 'IN (expr, ...)' form.
List<RelDataType> rightTypeList = new ArrayList<>();
SqlNodeList nodeList = (SqlNodeList) right;
for (int i = 0; i < nodeList.size(); i++) {
SqlNode node = nodeList.get(i);
RelDataType nodeType = validator.deriveType(scope, node);
rightTypeList.add(nodeType);
}
rightType = typeFactory.leastRestrictive(rightTypeList);

// First check that the expressions in the IN list are compatible
// with each other. Same rules as the VALUES operator (per
// SQL:2003 Part 2 Section 8.4, <in predicate>).
if (null == rightType && validator.isTypeCoercionEnabled()) {
// Do implicit type cast if it is allowed to.
rightType = validator.getTypeCoercion().getWiderTypeFor(rightTypeList, true);
}
if (null == rightType) {
throw validator.newValidationError(right, RESOURCE.incompatibleTypesInList());
}

// Record the RHS type for use by SqlToRelConverter.
((SqlValidatorImpl) validator).setValidatedNodeType(nodeList, rightType);
} else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the else part not required anymore because for IN (query) type SQLs, we use Calcite's IN operator here?

Could you also add a UT / point me to a UT which validates that type derivation won't fail for those SQLs due to this change?

Copy link
Contributor

@KevinGe00 KevinGe00 Sep 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right is now explicitly built as a SqlNodeList using the list of operands, so there is no case where right isn't an instance of SqlNodeList.

https://github.com/linkedin/coral/pull/410/files#diff-0e69daa34dc3fe4e131718bf050d0cfd47dde9e79921e53e6ccda1d78aeefe17R83-R86

// Handle the 'IN (query)' form.
rightType = validator.deriveType(scope, right);
List<RelDataType> rightTypeList = new ArrayList<>();
for (int i = 0; i < right.size(); i++) {
SqlNode node = right.get(i);
RelDataType nodeType = validator.deriveType(scope, node);
rightTypeList.add(nodeType);
}
rightType = typeFactory.leastRestrictive(rightTypeList);

// First check that the expressions in the IN list are compatible
// with each other. Same rules as the VALUES operator (per
// SQL:2003 Part 2 Section 8.4, <in predicate>).
if (null == rightType && validator.isTypeCoercionEnabled()) {
// Do implicit type cast if it is allowed to.
rightType = validator.getTypeCoercion().getWiderTypeFor(rightTypeList, true);
}
if (null == rightType) {
throw validator.newValidationError(right, RESOURCE.incompatibleTypesInList());
}

// Record the RHS type for use by SqlToRelConverter.
((SqlValidatorImpl) validator).setValidatedNodeType(right, rightType);

SqlCallBinding callBinding = new SqlCallBinding(validator, scope, call);
// Coerce type first.
if (callBinding.getValidator().isTypeCoercionEnabled()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,13 @@ public void testAliasHaving() {
assertEquals(CoralSpark.create(relNode).getSparkSql(), targetSql);
}

@Test
public void testInOperator() {
RelNode relNode = TestUtils.toRelNode("SELECT a from foo where b in ('v1', 'v2')");
String targetSql = "SELECT foo.a\n" + "FROM default.foo foo\n" + "WHERE foo.b IN ('v1', 'v2')";
assertEquals(CoralSpark.create(relNode).getSparkSql(), targetSql);
}

@Test
public void testCastDecimal() {
RelNode relNode = TestUtils.toRelNode("SELECT CAST(a as DECIMAL(6, 2)) as casted_decimal FROM default.foo");
Expand Down