From 70ecd884f6a95eacc6faeecbd87b5888f706ac72 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Tue, 25 Oct 2022 14:11:29 +0530 Subject: [PATCH 01/11] Adding SqlCreate Table sql node --- .../common/calcite/sql/SqlCreateTable.java | 59 +++++++++++++++++++ .../parsetree/AbstractASTVisitor.java | 40 +++++++++++++ .../hive2rel/parsetree/ParseTreeBuilder.java | 45 ++++++++++++++ 3 files changed, 144 insertions(+) create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java new file mode 100644 index 000000000..5719dfac8 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -0,0 +1,59 @@ +package com.linkedin.coral.common.calcite.sql; + +import com.linkedin.coral.javax.annotation.Nullable; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.*; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.util.ImmutableNullableList; + +import java.util.List; +import java.util.Objects; + +public class SqlCreateTable extends SqlCreate { + public final SqlIdentifier name; + public final @Nullable SqlNodeList columnList; + public final @Nullable SqlNode query; + + private static final SqlOperator OPERATOR = + new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); + + /** Creates a SqlCreateTable. */ + public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, + SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode query) { + super(OPERATOR, pos, replace, ifNotExists); + this.name = Objects.requireNonNull(name, "name"); + this.columnList = columnList; // may be null + this.query = query; // for "CREATE TABLE ... AS query"; may be null + } + + @SuppressWarnings("nullness") + @Override public List getOperandList() { + return ImmutableNullableList.of(name, columnList, query); + } + + @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + writer.keyword("CREATE"); + writer.keyword("TABLE"); + if (ifNotExists) { + writer.keyword("IF NOT EXISTS"); + } + name.unparse(writer, leftPrec, rightPrec); + if (columnList != null) { + SqlWriter.Frame frame = writer.startList("(", ")"); + for (SqlNode c : columnList) { + writer.sep(","); + c.unparse(writer, 0, 0); + } + writer.endList(frame); + } + if (query != null) { + writer.keyword("AS"); + writer.newlineAndIndent(); + query.unparse(writer, 0, 0); + } + } + + public SqlNode getSelectQuery() { + return query; + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java index 8f71bf0cb..b655658dc 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java @@ -302,12 +302,52 @@ protected R visit(ASTNode node, C ctx) { case HiveParser.KW_CURRENT: return visitCurrentRow(node, ctx); + case HiveParser.TOK_CREATETABLE: + return visitCreateTable(node, ctx); + case HiveParser.TOK_LIKETABLE: + return visitLikeTable(node, ctx); + case HiveParser.TOK_IFNOTEXISTS: + return visitIfNotExists(node, ctx); + case HiveParser.TOK_TABCOLLIST: + return visitColumnList(node, ctx); + case HiveParser.TOK_TABCOL: + return visitColumn(node, ctx); + default: // return visitChildren(node, ctx); throw new UnhandledASTTokenException(node); } } + protected R visitColumn(ASTNode node, C ctx) { + if(node.getChildren() != null){ + return visitChildren(node, ctx).get(0); + } + return null; + } + + protected R visitColumnList(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitIfNotExists(ASTNode node, C ctx) { + if(node.getChildren() != null){ + return visitChildren(node, ctx).get(0); + } + return null; + } + + protected R visitLikeTable(ASTNode node, C ctx){ + if(node.getChildren() != null){ + return visitChildren(node, ctx).get(0); + } + return null; + } + + protected R visitCreateTable(ASTNode node, C ctx){ + return visitChildren(node, ctx).get(0); + } + protected R visitKeywordLiteral(ASTNode node, C ctx) { return visitChildren(node, ctx).get(0); } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index 2eda31f4e..3a8c67400 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -13,6 +13,7 @@ import javax.annotation.Nullable; +import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.sql.JoinConditionType; import org.apache.calcite.sql.JoinType; @@ -39,6 +40,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.metastore.api.Table; import com.linkedin.coral.com.google.common.collect.ImmutableList; @@ -608,6 +610,42 @@ protected SqlNode visitSelect(ASTNode node, ParseContext ctx) { return ctx.selects; } + @Override + protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx){ + CreateTableOptions ctOptions = new CreateTableOptions(); + for(Node child: node.getChildren()){ + ASTNode ast = (ASTNode) child; + switch (ast.getType()){ + case HiveParser.TOK_TABNAME: + ctOptions.name = (SqlIdentifier) visitTabnameNode(node, ctx); + case HiveParser.TOK_IFNOTEXISTS: + ctOptions.ifNotExists = true; + case HiveParser.TOK_TABCOLLIST: + ctOptions.columnList = (SqlNodeList) visitColumnList(node, ctx); + case HiveParser.TOK_QUERY: + ctOptions.query = visitQueryNode(node, ctx); + default: + visit(node, ctx); + } + } + return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists, ctOptions.name, ctOptions.columnList, ctOptions.query); + } + + @Override + protected SqlNode visitColumnList(ASTNode node, ParseContext ctx){ + List sqlNodeList = visitChildren(node, ctx); + return new SqlNodeList(sqlNodeList, ZERO); + } + + @Override + protected SqlNode visitColumn(ASTNode node, ParseContext ctx) { + return visitChildren(node, ctx).get(0); + } + + protected SqlNode visitIfNotExists(ASTNode node, ParseContext ctx) { + return SqlLiteral.createBoolean(true, ZERO); + } + @Override protected SqlNode visitTabRefNode(ASTNode node, ParseContext ctx) { List sqlNodes = visitChildren(node, ctx); @@ -1059,4 +1097,11 @@ Optional getHiveTable() { return hiveTable; } } + + class CreateTableOptions { + SqlIdentifier name; + SqlNodeList columnList; + SqlNode query; + Boolean ifNotExists; + } } From 276ff154db350d0f5b1419dcaf700b87202b628c Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Tue, 25 Oct 2022 17:27:35 +0530 Subject: [PATCH 02/11] Reafctor code to support create table --- .../linkedin/coral/common/ToRelConverter.java | 15 +++++++++------ .../coral/common/calcite/sql/SqlCommand.java | 10 ++++++++++ .../common/calcite/sql/SqlCreateTable.java | 6 +++++- .../hive2rel/parsetree/ParseTreeBuilder.java | 14 +++++++++----- .../coralservice/utils/TranslationUtils.java | 10 +++++++--- .../com/linkedin/coral/spark/CoralSpark.java | 19 +++++++++++++++++++ 6 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index 69fa751aa..b434c2811 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -13,6 +13,7 @@ import com.google.common.collect.ImmutableList; +import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.config.CalciteConnectionProperty; @@ -118,7 +119,6 @@ protected ToRelConverter(Map>> localMetaStore) public RelNode convertSql(String sql) { return toRel(toSqlNode(sql)); } - /** * Similar to {@link #convertSql(String)} but converts hive view definition stored * in the hive metastore to corresponding {@link RelNode} implementation. @@ -133,8 +133,7 @@ public RelNode convertView(String hiveDbName, String hiveViewName) { return toRel(sqlNode); } - // TODO change back to protected once the relevant tests move to the common package - @VisibleForTesting +// @VisibleForTesting public SqlNode toSqlNode(String sql) { return toSqlNode(sql, null); } @@ -161,10 +160,14 @@ public SqlNode processView(String dbName, String tableName) { return toSqlNode(stringViewExpandedText, table); } - @VisibleForTesting - protected RelNode toRel(SqlNode sqlNode) { +// @VisibleForTesting + public RelNode toRel(SqlNode sqlNode) { + if(sqlNode instanceof SqlCreateTable){ + sqlNode = ((SqlCreateTable) sqlNode).getSelectQuery(); + } RelRoot root = getSqlToRelConverter().convertQuery(sqlNode, true, true); - return standardizeRel(root.rel); + RelNode relNode = standardizeRel(root.rel); + return relNode; } /** diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java new file mode 100644 index 000000000..d45ed1c68 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java @@ -0,0 +1,10 @@ +package com.linkedin.coral.common.calcite.sql; + +import org.apache.calcite.sql.SqlNode; + +public interface SqlCommand { + + public SqlNode getSelectQuery(); + + public void setSelectQuery(SqlNode selectQuery); +} \ No newline at end of file diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java index 5719dfac8..32323513b 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -12,7 +12,7 @@ public class SqlCreateTable extends SqlCreate { public final SqlIdentifier name; public final @Nullable SqlNodeList columnList; - public final @Nullable SqlNode query; + public @Nullable SqlNode query; private static final SqlOperator OPERATOR = new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); @@ -56,4 +56,8 @@ public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, public SqlNode getSelectQuery() { return query; } + + public void setQuery(SqlNode query) { + this.query = query; + } } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index 3a8c67400..0dfd6a38c 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -617,18 +617,22 @@ protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx){ ASTNode ast = (ASTNode) child; switch (ast.getType()){ case HiveParser.TOK_TABNAME: - ctOptions.name = (SqlIdentifier) visitTabnameNode(node, ctx); + ctOptions.name = (SqlIdentifier) visitTabnameNode(ast, ctx); + break; case HiveParser.TOK_IFNOTEXISTS: ctOptions.ifNotExists = true; + break; case HiveParser.TOK_TABCOLLIST: - ctOptions.columnList = (SqlNodeList) visitColumnList(node, ctx); + ctOptions.columnList = (SqlNodeList) visitColumnList(ast, ctx); + break; case HiveParser.TOK_QUERY: - ctOptions.query = visitQueryNode(node, ctx); + ctOptions.query = visitQueryNode(ast, ctx); + break; default: - visit(node, ctx); + break; } } - return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists, ctOptions.name, ctOptions.columnList, ctOptions.query); + return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists != null ? ctOptions.ifNotExists : false, ctOptions.name, ctOptions.columnList, ctOptions.query); } @Override diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index 1828bcd25..be74fdc09 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -9,8 +9,12 @@ import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.spark.CoralSpark; +<<<<<<< HEAD import com.linkedin.coral.trino.rel2trino.RelToTrinoConverter; import com.linkedin.coral.trino.trino2rel.TrinoToRelConverter; +======= +import org.apache.calcite.sql.SqlNode; +>>>>>>> 32f952c (Reafctor code to support create table) import static com.linkedin.coral.coralservice.utils.CoralProvider.*; @@ -29,8 +33,8 @@ public static String translateHiveToTrino(String query) { } public static String translateHiveToSpark(String query) { - RelNode relNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); - CoralSpark coralSpark = CoralSpark.create(relNode); - return coralSpark.getSparkSql(); + SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); + SqlNode coralSqlNode = CoralSpark.convertRelNodeToCoralSqlNode(hiveToRelConverter.toRel(sqlNode), sqlNode); + return CoralSpark.convert(coralSqlNode); } } diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 056edb535..79a7ba6ab 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.stream.Collectors; +import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.avro.Schema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; @@ -70,6 +71,24 @@ public static CoralSpark create(RelNode irRelNode) { return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); } + public static String convert(SqlNode coralSqlNode){ + SqlNode sparkSqlNode = coralSqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()); + SqlNode rewrittenSparkSqlNode = sparkSqlNode.accept(new SparkSqlRewriter()); + return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); + } + + public static SqlNode convertRelNodeToCoralSqlNode(RelNode relNode, SqlNode sqlNode){ + RelNode sparkRelNode = IRRelToSparkRelTransformer.transform(relNode).getSparkRelNode(); + CoralRelToSqlNodeConverter rel2sql = new CoralRelToSqlNodeConverter(); + SqlNode transformedSqlNode = rel2sql.convert(sparkRelNode); + if(sqlNode instanceof SqlCreateTable){ + ((SqlCreateTable) sqlNode).setQuery(transformedSqlNode); + return sqlNode; + } + return transformedSqlNode; + } + + /** * Users use this function as the main API for getting CoralSpark instance. * This should be used when user need to align the Coral-spark translated SQL From 75e84789a2194e727d6b30ff976cfd56ec3d67eb Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Fri, 28 Oct 2022 22:18:19 +0530 Subject: [PATCH 03/11] Adding translation logic Coral Spark and adding node of interest logic --- .../linkedin/coral/common/ToRelConverter.java | 6 -- .../common/calcite/sql/SqlCreateTable.java | 7 +-- .../controller/TranslationController.java | 13 ++++ .../coralservice/utils/TranslationUtils.java | 15 +++-- .../com/linkedin/coral/spark/CoralSpark.java | 59 +++++++++++++------ 5 files changed, 66 insertions(+), 34 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index b434c2811..8cb0c2855 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -13,7 +13,6 @@ import com.google.common.collect.ImmutableList; -import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.config.CalciteConnectionProperty; @@ -133,7 +132,6 @@ public RelNode convertView(String hiveDbName, String hiveViewName) { return toRel(sqlNode); } -// @VisibleForTesting public SqlNode toSqlNode(String sql) { return toSqlNode(sql, null); } @@ -160,11 +158,7 @@ public SqlNode processView(String dbName, String tableName) { return toSqlNode(stringViewExpandedText, table); } -// @VisibleForTesting public RelNode toRel(SqlNode sqlNode) { - if(sqlNode instanceof SqlCreateTable){ - sqlNode = ((SqlCreateTable) sqlNode).getSelectQuery(); - } RelRoot root = getSqlToRelConverter().convertQuery(sqlNode, true, true); RelNode relNode = standardizeRel(root.rel); return relNode; diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java index 32323513b..fba9e6bc4 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -1,7 +1,6 @@ package com.linkedin.coral.common.calcite.sql; import com.linkedin.coral.javax.annotation.Nullable; -import org.apache.calcite.rel.RelNode; import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.util.ImmutableNullableList; @@ -10,9 +9,9 @@ import java.util.Objects; public class SqlCreateTable extends SqlCreate { - public final SqlIdentifier name; - public final @Nullable SqlNodeList columnList; - public @Nullable SqlNode query; + private final SqlIdentifier name; + private final @Nullable SqlNodeList columnList; + private @Nullable SqlNode query; private static final SqlOperator OPERATOR = new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java index 6d0aa67b3..3ccb38ac0 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java @@ -65,6 +65,7 @@ public ResponseEntity translate(@RequestBody TranslateRequestBody translateReque translatedSql = translateTrinoToSpark(query); } } +<<<<<<< HEAD // From Hive else if (fromLanguage.equalsIgnoreCase("hive")) { // To Spark @@ -75,6 +76,18 @@ else if (fromLanguage.equalsIgnoreCase("hive")) { else if (toLanguage.equalsIgnoreCase("trino")) { translatedSql = translateHiveToTrino(query); } +======= + } + // From Hive + else if (fromLanguage.equalsIgnoreCase("hive")) { + // To Spark + if (toLanguage.equalsIgnoreCase("spark")) { + translatedSql = translateHiveQueryToSparkSql(query); + } + // To Trino + else if (toLanguage.equalsIgnoreCase("trino")) { + translatedSql = translateHiveToTrino(query); +>>>>>>> c5df021 (Adding translation logic Coral Spark and adding node of interest logic) } } catch (Throwable t) { t.printStackTrace(); diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index be74fdc09..ee9c4874f 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -9,12 +9,11 @@ import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.spark.CoralSpark; -<<<<<<< HEAD import com.linkedin.coral.trino.rel2trino.RelToTrinoConverter; import com.linkedin.coral.trino.trino2rel.TrinoToRelConverter; -======= import org.apache.calcite.sql.SqlNode; ->>>>>>> 32f952c (Reafctor code to support create table) + +import java.util.function.Function; import static com.linkedin.coral.coralservice.utils.CoralProvider.*; @@ -33,8 +32,14 @@ public static String translateHiveToTrino(String query) { } public static String translateHiveToSpark(String query) { + RelNode relNode = hiveToRelConverter.convertSql(query); + CoralSpark coralSpark = CoralSpark.create(relNode); + return coralSpark.getSparkSql(); + } + + public static String translateHiveQueryToSparkSql(String query){ SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); - SqlNode coralSqlNode = CoralSpark.convertRelNodeToCoralSqlNode(hiveToRelConverter.toRel(sqlNode), sqlNode); - return CoralSpark.convert(coralSqlNode); + Function hiveSqlNodeToRelConverter = hiveToRelConverter::toRel; + return CoralSpark.create(sqlNode, hiveSqlNodeToRelConverter).getSparkSql(); } } diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 79a7ba6ab..1f4763131 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -6,6 +6,7 @@ package com.linkedin.coral.spark; import java.util.List; +import java.util.function.Function; import java.util.stream.Collectors; import com.linkedin.coral.common.calcite.sql.SqlCreateTable; @@ -13,6 +14,7 @@ import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlCreate; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlSelect; @@ -70,25 +72,7 @@ public static CoralSpark create(RelNode irRelNode) { List sparkUDFInfos = sparkRelInfo.getSparkUDFInfoList(); return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); } - - public static String convert(SqlNode coralSqlNode){ - SqlNode sparkSqlNode = coralSqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()); - SqlNode rewrittenSparkSqlNode = sparkSqlNode.accept(new SparkSqlRewriter()); - return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); - } - - public static SqlNode convertRelNodeToCoralSqlNode(RelNode relNode, SqlNode sqlNode){ - RelNode sparkRelNode = IRRelToSparkRelTransformer.transform(relNode).getSparkRelNode(); - CoralRelToSqlNodeConverter rel2sql = new CoralRelToSqlNodeConverter(); - SqlNode transformedSqlNode = rel2sql.convert(sparkRelNode); - if(sqlNode instanceof SqlCreateTable){ - ((SqlCreateTable) sqlNode).setQuery(transformedSqlNode); - return sqlNode; - } - return transformedSqlNode; - } - - + /** * Users use this function as the main API for getting CoralSpark instance. * This should be used when user need to align the Coral-spark translated SQL @@ -134,6 +118,43 @@ private static String constructSparkSQL(RelNode sparkRelNode) { return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); } + /** + * Users use this function as the main API for getting CoralSpark instance. + * + * Internally Appropriate parts of Sql RelNode is converted to Spark RelNode, Spark RelNode is converted back + * to SqlNode and SqlNode to SparkSQL. + * + * It returns an instance of CoralSpark which contains + * 1) Spark SQL + * 2) Base tables + * 3) Spark UDF information objects, ie. List of {@link SparkUDFInfo} + * + * @param sqlNode CoralNode which will be translated to SparkSQL. + * @param convertor Functional Interface to convert SqlNode to appropriate RelNode + * + * + * @return [[CoralSparkInfo]] + */ + public static CoralSpark create(SqlNode sqlNode, Function convertor){ + SparkRelInfo sparkRelInfo; + //apply RelNode transformations for sqlNode eligible for transformation. + if(sqlNode instanceof SqlCreate) { + SqlNode selectNode = ((SqlCreateTable) sqlNode).getSelectQuery(); + sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(selectNode)); + selectNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); + ((SqlCreateTable) sqlNode).setQuery(selectNode); + } else { + sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(sqlNode)); + sqlNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); + } + // sqlNode to sparkSQL + String sparkSQL = sqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()) + .accept(new SparkSqlRewriter()).toSqlString(SparkSqlDialect.INSTANCE).getSql(); + List baseTables = constructBaseTables(sparkRelInfo.getSparkRelNode()); + List sparkUDFInfos = sparkRelInfo.getSparkUDFInfoList(); + return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); + } + private static String constructSparkSQLWithExplicitAlias(RelNode sparkRelNode, List aliases) { CoralRelToSqlNodeConverter rel2sql = new CoralRelToSqlNodeConverter(); // Create temporary objects r and rewritten to make debugging easier From b8577853dcc1425de18256db7d27a54ef0f999c2 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Sat, 29 Oct 2022 21:49:03 +0530 Subject: [PATCH 04/11] merge master --- .../com/linkedin/coral/coralservice/utils/TranslationUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index ee9c4874f..da720eea1 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -38,6 +38,7 @@ public static String translateHiveToSpark(String query) { } public static String translateHiveQueryToSparkSql(String query){ + HiveToRelConverter hiveToRelConverter = new HiveToRelConverter(hiveMetastoreClient); SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); Function hiveSqlNodeToRelConverter = hiveToRelConverter::toRel; return CoralSpark.create(sqlNode, hiveSqlNodeToRelConverter).getSparkSql(); From 3ae48d387d3ebd7efed18e8b7b03639d57a4b8a3 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Mon, 31 Oct 2022 16:33:49 +0530 Subject: [PATCH 05/11] adding more fields to support different variations of create table as select syntax --- .../common/calcite/sql/SqlCreateTable.java | 33 +++- .../parsetree/AbstractASTVisitor.java | 43 ++++- .../hive2rel/parsetree/ParseTreeBuilder.java | 162 ++++++++++-------- 3 files changed, 168 insertions(+), 70 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java index fba9e6bc4..846d6e9f6 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -12,17 +12,23 @@ public class SqlCreateTable extends SqlCreate { private final SqlIdentifier name; private final @Nullable SqlNodeList columnList; private @Nullable SqlNode query; + private final @Nullable SqlNode tableSerializer; + private final @Nullable SqlNodeList tableFileFormat; + private final @Nullable SqlCharStringLiteral tableRowFormat; private static final SqlOperator OPERATOR = new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); /** Creates a SqlCreateTable. */ public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, - SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode query) { + SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode query, SqlNode tableSerializer, SqlNodeList tableFileFormat, SqlCharStringLiteral tableRowFormat) { super(OPERATOR, pos, replace, ifNotExists); this.name = Objects.requireNonNull(name, "name"); - this.columnList = columnList; // may be null + this.columnList = columnList; // may be null, like in case of ctas this.query = query; // for "CREATE TABLE ... AS query"; may be null + this.tableSerializer = tableSerializer; + this.tableFileFormat = tableFileFormat; + this.tableRowFormat = tableRowFormat; } @SuppressWarnings("nullness") @@ -45,6 +51,29 @@ public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, } writer.endList(frame); } + if(tableSerializer != null){ + writer.keyword("ROW FORMAT SERDE"); + tableSerializer.unparse(writer, 0, 0); + writer.newlineAndIndent(); + } + if(tableRowFormat != null){ + writer.keyword("ROW FORMAT DELIMITED FIELDS TERMINATED BY"); + tableRowFormat.unparse(writer, 0, 0); + writer.newlineAndIndent(); + } + if(tableFileFormat != null){ + if(tableFileFormat.size() == 1){ + writer.keyword("STORED AS"); + tableFileFormat.get(0).unparse(writer, 0, 0); + writer.newlineAndIndent(); + } else { + writer.keyword("STORED AS INPUTFORMAT"); + tableFileFormat.get(0).unparse(writer, 0, 0); + writer.keyword("OUTPUTFORMAT"); + tableFileFormat.get(1).unparse(writer, 0, 0); + writer.newlineAndIndent(); + } + } if (query != null) { writer.keyword("AS"); writer.newlineAndIndent(); diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java index b655658dc..b420c07e0 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java @@ -312,13 +312,54 @@ protected R visit(ASTNode node, C ctx) { return visitColumnList(node, ctx); case HiveParser.TOK_TABCOL: return visitColumn(node, ctx); - + case HiveParser.TOK_FILEFORMAT_GENERIC: + return visitFileFormatGeneric(node, ctx); + case HiveParser.TOK_TABLEFILEFORMAT: + return visitTableFileFormat(node, ctx); + case HiveParser.TOK_TABLESERIALIZER: + return visitTableSerializer(node, ctx); + case HiveParser.TOK_SERDENAME: + return visitSerdeName(node, ctx); + case HiveParser.TOK_TABLEROWFORMAT: + return visitTableRowFormat(node, ctx); + case HiveParser.TOK_SERDEPROPS: + return visitSerdeProps(node, ctx); + case HiveParser.TOK_TABLEROWFORMATFIELD: + return visitTableRowFormatField(node, ctx); default: // return visitChildren(node, ctx); throw new UnhandledASTTokenException(node); } } + protected R visitTableRowFormatField(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitSerdeProps(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitTableRowFormat(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitSerdeName(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitTableSerializer(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitTableFileFormat(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + + protected R visitFileFormatGeneric(ASTNode node, C ctx) { + return visitChildren(node, ctx).get(0); + } + protected R visitColumn(ASTNode node, C ctx) { if(node.getChildren() != null){ return visitChildren(node, ctx).get(0); diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index 0dfd6a38c..84596f427 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -5,42 +5,17 @@ */ package com.linkedin.coral.hive.hive2rel.parsetree; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Optional; +import java.util.*; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.avatica.util.TimeUnit; -import org.apache.calcite.sql.JoinConditionType; -import org.apache.calcite.sql.JoinType; -import org.apache.calcite.sql.SqlAsOperator; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlBasicTypeNameSpec; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlDataTypeSpec; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlIntervalQualifier; -import org.apache.calcite.sql.SqlJoin; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlLateralOperator; -import org.apache.calcite.sql.SqlLiteral; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlNodeList; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.SqlSelect; -import org.apache.calcite.sql.SqlSelectKeyword; -import org.apache.calcite.sql.SqlTypeNameSpec; -import org.apache.calcite.sql.SqlWindow; -import org.apache.calcite.sql.SqlWith; -import org.apache.calcite.sql.SqlWithItem; +import org.apache.calcite.sql.*; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.metastore.api.Table; import com.linkedin.coral.com.google.common.collect.ImmutableList; @@ -185,10 +160,10 @@ private SqlNode visitLateralViewInternal(ASTNode node, ParseContext ctx, boolean */ private SqlNode visitLateralViewUDTF(List sqlNodes, List aliasOperands, SqlCall tableFunctionCall) { SqlNode lateralCall = SqlStdOperatorTable.LATERAL.createCall(ZERO, - new SqlLateralOperator(SqlKind.COLLECTION_TABLE).createCall(ZERO, tableFunctionCall)); + new SqlLateralOperator(SqlKind.COLLECTION_TABLE).createCall(ZERO, tableFunctionCall)); final String functionName = tableFunctionCall.getOperator().getName(); ImmutableList fieldNames = - StaticHiveFunctionRegistry.UDTF_RETURN_FIELD_NAME_MAP.getOrDefault(functionName, null); + StaticHiveFunctionRegistry.UDTF_RETURN_FIELD_NAME_MAP.getOrDefault(functionName, null); if (fieldNames == null) { throw new RuntimeException("User defined table function " + functionName + " is not registered."); } @@ -198,11 +173,11 @@ private SqlNode visitLateralViewUDTF(List sqlNodes, List alias fieldNames.forEach(name -> asOperands.add(new SqlIdentifier(name, ZERO))); SqlCall aliasCall = SqlStdOperatorTable.AS.createCall(ZERO, asOperands); return new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), JoinType.COMMA.symbol(ZERO), - aliasCall/*lateralCall*/, JoinConditionType.NONE.symbol(ZERO), null); + aliasCall/*lateralCall*/, JoinConditionType.NONE.symbol(ZERO), null); } private SqlNode visitLateralViewExplode(List sqlNodes, List aliasOperands, - SqlCall tableFunctionCall, boolean isOuter) { + SqlCall tableFunctionCall, boolean isOuter) { final int operandCount = aliasOperands.size(); // explode array if operandCount == 3: LATERAL VIEW EXPLODE(op0) op1 AS op2 // explode map if operandCount == 4: LATERAL VIEW EXPLODE(op0) op1 AS op2, op3 @@ -214,7 +189,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al // Note that `operandCount == 2 && isOuter` is not supported yet due to the lack of type information needed // to derive the correct IF function parameters. checkState(operandCount == 2 || operandCount == 3 || operandCount == 4, - format("Unsupported LATERAL VIEW EXPLODE operand number: %d", operandCount)); + format("Unsupported LATERAL VIEW EXPLODE operand number: %d", operandCount)); // TODO The code below assumes LATERAL VIEW is used with UNNEST EXPLODE/POSEXPLODE only. It should be made more generic. SqlCall unnestCall = tableFunctionCall; SqlNode unnestOperand = unnestCall.operand(0); @@ -222,25 +197,25 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al if (isOuter) { checkState(operandCount > 2, - "LATERAL VIEW OUTER EXPLODE without column aliases is not supported. Add 'AS col' or 'AS key, value' to fix it"); + "LATERAL VIEW OUTER EXPLODE without column aliases is not supported. Add 'AS col' or 'AS key, value' to fix it"); // transforms unnest(b) to unnest( if(b is null or cardinality(b) = 0, ARRAY(null)/MAP(null, null), b)) SqlNode operandIsNull = SqlStdOperatorTable.IS_NOT_NULL.createCall(ZERO, unnestOperand); SqlNode emptyArray = SqlStdOperatorTable.GREATER_THAN.createCall(ZERO, - SqlStdOperatorTable.CARDINALITY.createCall(ZERO, unnestOperand), SqlLiteral.createExactNumeric("0", ZERO)); + SqlStdOperatorTable.CARDINALITY.createCall(ZERO, unnestOperand), SqlLiteral.createExactNumeric("0", ZERO)); SqlNode ifCondition = SqlStdOperatorTable.AND.createCall(ZERO, operandIsNull, emptyArray); // array of [null] or map of (null, null) should be 3rd param to if function. With our type inference, calcite acts // smart and for unnest(array[null]) or unnest(map(null, null)) determines return type to be null SqlNode arrayOrMapOfNull; if (operandCount == 3 - || (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality)) { + || (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality)) { arrayOrMapOfNull = SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR.createCall(ZERO, SqlLiteral.createNull(ZERO)); } else { arrayOrMapOfNull = SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall(ZERO, SqlLiteral.createNull(ZERO), - SqlLiteral.createNull(ZERO)); + SqlLiteral.createNull(ZERO)); } Function hiveIfFunction = functionResolver.tryResolve("if", null, 1); unnestOperand = hiveIfFunction.createCall(SqlLiteral.createCharString("if", ZERO), - ImmutableList.of(ifCondition, unnestOperand, arrayOrMapOfNull), null); + ImmutableList.of(ifCondition, unnestOperand, arrayOrMapOfNull), null); } unnestCall = operator.createCall(ZERO, unnestOperand); @@ -256,7 +231,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al // For POSEXPLODE case, we need to change the order of 2 alias. i.e. `pos, val` -> `val, pos` to be aligned with calcite validation if (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality - && operandCount == 4) { + && operandCount == 4) { asOperands.add(aliasOperands.get(1)); asOperands.add(aliasOperands.get(3)); asOperands.add(aliasOperands.get(2)); @@ -266,7 +241,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al SqlNode as = SqlStdOperatorTable.AS.createCall(ZERO, asOperands); return new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), JoinType.COMMA.symbol(ZERO), as, - JoinConditionType.NONE.symbol(ZERO), null); + JoinConditionType.NONE.symbol(ZERO), null); } private SqlNode visitLateralViewJsonTuple(List sqlNodes, List aliasOperands, SqlCall sqlCall) { @@ -295,34 +270,34 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b // '$["jsonKey"]' SqlCall jsonPath = SqlStdOperatorTable.CONCAT.createCall(ZERO, - SqlStdOperatorTable.CONCAT.createCall(ZERO, SqlLiteral.createCharString("$[\"", ZERO), jsonKey), - SqlLiteral.createCharString("\"]", ZERO)); + SqlStdOperatorTable.CONCAT.createCall(ZERO, SqlLiteral.createCharString("$[\"", ZERO), jsonKey), + SqlLiteral.createCharString("\"]", ZERO)); SqlCall getJsonObjectCall = - getJsonObjectFunction.createCall(SqlLiteral.createCharString(getJsonObjectFunction.getFunctionName(), ZERO), - ImmutableList.of(jsonInput, jsonPath), null); + getJsonObjectFunction.createCall(SqlLiteral.createCharString(getJsonObjectFunction.getFunctionName(), ZERO), + ImmutableList.of(jsonInput, jsonPath), null); // TODO Hive get_json_object returns a string, but currently is mapped in Trino to json_extract which returns a json. Once fixed, remove the CAST SqlCall castToString = SqlStdOperatorTable.CAST.createCall(ZERO, getJsonObjectCall, - // TODO This results in CAST to VARCHAR(65535), which may be too short, but there seems to be no way to avoid that. - // even `new SqlDataTypeSpec(new SqlBasicTypeNameSpec(SqlTypeName.VARCHAR, Integer.MAX_VALUE - 1, ZERO), ZERO)` results in a limited VARCHAR precision. - createBasicTypeSpec(SqlTypeName.VARCHAR)); + // TODO This results in CAST to VARCHAR(65535), which may be too short, but there seems to be no way to avoid that. + // even `new SqlDataTypeSpec(new SqlBasicTypeNameSpec(SqlTypeName.VARCHAR, Integer.MAX_VALUE - 1, ZERO), ZERO)` results in a limited VARCHAR precision. + createBasicTypeSpec(SqlTypeName.VARCHAR)); // TODO support jsonKey containing a quotation mark (") or backslash (\) SqlCall ifCondition = - HiveRLikeOperator.RLIKE.createCall(ZERO, jsonKey, SqlLiteral.createCharString("^[^\\\"]*$", ZERO)); + HiveRLikeOperator.RLIKE.createCall(ZERO, jsonKey, SqlLiteral.createCharString("^[^\\\"]*$", ZERO)); SqlCall ifFunctionCall = ifFunction.createCall(SqlLiteral.createCharString(ifFunction.getFunctionName(), ZERO), - ImmutableList.of(ifCondition, castToString, SqlLiteral.createNull(ZERO)), null); + ImmutableList.of(ifCondition, castToString, SqlLiteral.createNull(ZERO)), null); SqlNode projection = ifFunctionCall; // Currently only explicit aliasing is supported. Implicit alias would be c0, c1, etc. projections.add(SqlStdOperatorTable.AS.createCall(ZERO, projection, keyAlias)); } SqlNode select = - new SqlSelect(ZERO, null, new SqlNodeList(projections, ZERO), null, null, null, null, null, null, null, null); + new SqlSelect(ZERO, null, new SqlNodeList(projections, ZERO), null, null, null, null, null, null, null, null); SqlNode lateral = SqlStdOperatorTable.LATERAL.createCall(ZERO, select); SqlCall lateralAlias = SqlStdOperatorTable.AS.createCall(ZERO, - ImmutableList. builder().add(lateral).addAll(aliasOperands.subList(1, aliasOperands.size())).build()); + ImmutableList. builder().add(lateral).addAll(aliasOperands.subList(1, aliasOperands.size())).build()); SqlNode joinNode = new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), - JoinType.COMMA.symbol(ZERO), lateralAlias, JoinConditionType.NONE.symbol(ZERO), null); + JoinType.COMMA.symbol(ZERO), lateralAlias, JoinConditionType.NONE.symbol(ZERO), null); return joinNode; } @@ -369,7 +344,7 @@ private SqlNode processJoin(ASTNode node, ParseContext ctx, JoinType joinType) { } return new SqlJoin(ZERO, children.get(0), SqlLiteral.createBoolean(false, ZERO), joinType.symbol(ZERO), - children.get(1), conditionType.symbol(ZERO), condition); + children.get(1), conditionType.symbol(ZERO), condition); } @Override @@ -483,7 +458,7 @@ protected SqlNode visitOperator(ASTNode node, ParseContext ctx) { return visitBinaryOperator(node, ctx); } else { throw new RuntimeException( - String.format("Unhandled AST operator: %s with > 2 children, tree: %s", node.getText(), node.dump())); + String.format("Unhandled AST operator: %s with > 2 children, tree: %s", node.getText(), node.dump())); } } @@ -525,7 +500,7 @@ protected SqlNode visitLParen(ASTNode node, ParseContext ctx) { protected SqlNode visitFunctionStar(ASTNode node, ParseContext ctx) { ASTNode functionNode = (ASTNode) node.getChildren().get(0); List functions = SqlStdOperatorTable.instance().getOperatorList().stream() - .filter(f -> functionNode.getText().equalsIgnoreCase(f.getName())).collect(Collectors.toList()); + .filter(f -> functionNode.getText().equalsIgnoreCase(f.getName())).collect(Collectors.toList()); checkState(functions.size() == 1); return new SqlBasicCall(functions.get(0), new SqlNode[] { new SqlIdentifier("", ZERO) }, ZERO); } @@ -547,8 +522,8 @@ private SqlNode visitFunctionInternal(ASTNode node, ParseContext ctx, SqlLiteral String functionName = functionNode.getText(); List sqlOperands = visitChildren(children, ctx); Function hiveFunction = functionResolver.tryResolve(functionName, ctx.hiveTable.orElse(null), - // The first element of sqlOperands is the operator itself. The actual # of operands is sqlOperands.size() - 1 - sqlOperands.size() - 1); + // The first element of sqlOperands is the operator itself. The actual # of operands is sqlOperands.size() - 1 + sqlOperands.size() - 1); // Special treatment for Window Function SqlNode lastSqlOperand = sqlOperands.get(sqlOperands.size() - 1); @@ -561,7 +536,7 @@ private SqlNode visitFunctionInternal(ASTNode node, ParseContext ctx, SqlLiteral // SqlBasicCall("OVER") will have 2 children: "func" and SqlWindow /** See {@link #visitWindowSpec(ASTNode, ParseContext)} for SQL, AST Tree and SqlNode Tree examples */ SqlNode func = - hiveFunction.createCall(sqlOperands.get(0), sqlOperands.subList(1, sqlOperands.size() - 1), quantifier); + hiveFunction.createCall(sqlOperands.get(0), sqlOperands.subList(1, sqlOperands.size() - 1), quantifier); SqlNode window = lastSqlOperand; return new SqlBasicCall(SqlStdOperatorTable.OVER, new SqlNode[] { func, window }, ZERO); } @@ -628,11 +603,24 @@ protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx){ case HiveParser.TOK_QUERY: ctOptions.query = visitQueryNode(ast, ctx); break; + case HiveParser.TOK_TABLESERIALIZER: + ctOptions.tableSerializer = visitTableSerializer(ast, ctx); + break; + case HiveParser.TOK_TABLEFILEFORMAT: + ctOptions.tableFileFormat = (SqlNodeList) visitTableFileFormat(ast, ctx); + break; + case HiveParser.TOK_FILEFORMAT_GENERIC: + ctOptions.tableFileFormat = (SqlNodeList) visitFileFormatGeneric(ast, ctx); + break; + case HiveParser.TOK_TABLEROWFORMAT: + ctOptions.tableRowFormat = (SqlCharStringLiteral) visitTableRowFormat(ast, ctx); + break; default: break; } } - return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists != null ? ctOptions.ifNotExists : false, ctOptions.name, ctOptions.columnList, ctOptions.query); + return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists != null ? ctOptions.ifNotExists : false, ctOptions.name, + ctOptions.columnList, ctOptions.query, ctOptions.tableSerializer, ctOptions.tableFileFormat, ctOptions.tableRowFormat); } @Override @@ -646,10 +634,47 @@ protected SqlNode visitColumn(ASTNode node, ParseContext ctx) { return visitChildren(node, ctx).get(0); } + @Override protected SqlNode visitIfNotExists(ASTNode node, ParseContext ctx) { return SqlLiteral.createBoolean(true, ZERO); } + @Override + protected SqlNode visitTableRowFormat(ASTNode node, ParseContext ctx){ + return visitChildren(node, ctx).get(0); + } + + @Override + protected SqlNode visitSerdeName(ASTNode node, ParseContext ctx) { + return visit((ASTNode) node.getChildren().get(0), ctx); + } + + @Override + protected SqlNode visitTableSerializer(ASTNode node, ParseContext ctx) { + return visitChildren(node, ctx).get(0); + } + + @Override + protected SqlNode visitTableFileFormat(ASTNode node, ParseContext ctx) { + List sqlNodeList = visitChildren(node, ctx); + return new SqlNodeList(sqlNodeList, ZERO); + } + + @Override + protected SqlNode visitFileFormatGeneric(ASTNode node, ParseContext ctx) { + return new SqlNodeList(Arrays.asList(visitChildren(node, ctx).get(0)), ZERO); + } + + @Override + protected SqlNode visitSerdeProps(ASTNode node, ParseContext ctx) { + return visitChildren(node, ctx).get(0); + } + + @Override + protected SqlNode visitTableRowFormatField(ASTNode node, ParseContext ctx) { + return visitChildren(node, ctx).get(0); + } + @Override protected SqlNode visitTabRefNode(ASTNode node, ParseContext ctx) { List sqlNodes = visitChildren(node, ctx); @@ -668,7 +693,7 @@ protected SqlNode visitTabRefNode(ASTNode node, ParseContext ctx) { protected SqlNode visitTabnameNode(ASTNode node, ParseContext ctx) { List sqlNodes = visitChildren(node, ctx); List names = - sqlNodes.stream().map(s -> ((SqlIdentifier) s).names).flatMap(List::stream).collect(Collectors.toList()); + sqlNodes.stream().map(s -> ((SqlIdentifier) s).names).flatMap(List::stream).collect(Collectors.toList()); return new SqlIdentifier(names, ZERO); } @@ -772,7 +797,7 @@ protected SqlNode visitQueryNode(ASTNode node, ParseContext ctx) { } } SqlSelect select = new SqlSelect(ZERO, qc.keywords, qc.selects, qc.from, qc.where, qc.grpBy, qc.having, null, - qc.orderBy, null, qc.fetch); + qc.orderBy, null, qc.fetch); if (cte != null) { // Calcite uses "SqlWith(SqlNodeList of SqlWithItem, SqlSelect)" to represent queries with WITH /** See {@link #visitCTE(ASTNode, ParseContext) visitCTE} for details */ @@ -846,8 +871,8 @@ protected SqlNode visitDecimal(ASTNode node, ParseContext ctx) { if (node.getChildCount() == 2) { try { final SqlTypeNameSpec typeNameSpec = new SqlBasicTypeNameSpec(SqlTypeName.DECIMAL, - Integer.parseInt(((ASTNode) node.getChildren().get(0)).getText()), - Integer.parseInt(((ASTNode) node.getChildren().get(1)).getText()), ZERO); + Integer.parseInt(((ASTNode) node.getChildren().get(0)).getText()), + Integer.parseInt(((ASTNode) node.getChildren().get(1)).getText()), ZERO); return new SqlDataTypeSpec(typeNameSpec, ZERO); } catch (NumberFormatException e) { return createBasicTypeSpec(SqlTypeName.DECIMAL); @@ -965,9 +990,9 @@ protected SqlNode visitWindowSpec(ASTNode node, ParseContext ctx) { SqlWindow window = windowRange != null ? windowRange : windowValues; return new SqlWindow(ZERO, null, null, partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getPartitionList(), - partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getOrderList(), - SqlLiteral.createBoolean(windowRange != null, ZERO), window == null ? null : window.getLowerBound(), - window == null ? null : window.getUpperBound(), null); + partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getOrderList(), + SqlLiteral.createBoolean(windowRange != null, ZERO), window == null ? null : window.getLowerBound(), + window == null ? null : window.getUpperBound(), null); } @Override @@ -975,7 +1000,7 @@ protected SqlNode visitPartitioningSpec(ASTNode node, ParseContext ctx) { SqlNode partitionList = visitOptionalChildByType(node, ctx, HiveParser.TOK_DISTRIBUTEBY); SqlNode orderList = visitOptionalChildByType(node, ctx, HiveParser.TOK_ORDERBY); return new SqlWindow(ZERO, null, null, partitionList != null ? (SqlNodeList) partitionList : SqlNodeList.EMPTY, - orderList != null ? (SqlNodeList) orderList : SqlNodeList.EMPTY, null, null, null, null); + orderList != null ? (SqlNodeList) orderList : SqlNodeList.EMPTY, null, null, null, null); } @Override @@ -1107,5 +1132,8 @@ class CreateTableOptions { SqlNodeList columnList; SqlNode query; Boolean ifNotExists; + SqlNode tableSerializer; + SqlNodeList tableFileFormat; + SqlCharStringLiteral tableRowFormat; } -} +} \ No newline at end of file From a5a207954012ebcd352ab65a319d12a8d13de3c8 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Mon, 31 Oct 2022 22:47:04 +0530 Subject: [PATCH 06/11] fixing ide lint unnncessary changes --- .../hive2rel/parsetree/ParseTreeBuilder.java | 96 ++++++++++++------- 1 file changed, 61 insertions(+), 35 deletions(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index 84596f427..bc915cb2e 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -5,17 +5,43 @@ */ package com.linkedin.coral.hive.hive2rel.parsetree; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Arrays; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.avatica.util.TimeUnit; -import org.apache.calcite.sql.*; +import org.apache.calcite.sql.JoinConditionType; +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlAsOperator; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlBasicTypeNameSpec; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlDataTypeSpec; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLateralOperator; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlSelectKeyword; +import org.apache.calcite.sql.SqlTypeNameSpec; +import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.hadoop.hive.metastore.api.Table; import com.linkedin.coral.com.google.common.collect.ImmutableList; @@ -160,10 +186,10 @@ private SqlNode visitLateralViewInternal(ASTNode node, ParseContext ctx, boolean */ private SqlNode visitLateralViewUDTF(List sqlNodes, List aliasOperands, SqlCall tableFunctionCall) { SqlNode lateralCall = SqlStdOperatorTable.LATERAL.createCall(ZERO, - new SqlLateralOperator(SqlKind.COLLECTION_TABLE).createCall(ZERO, tableFunctionCall)); + new SqlLateralOperator(SqlKind.COLLECTION_TABLE).createCall(ZERO, tableFunctionCall)); final String functionName = tableFunctionCall.getOperator().getName(); ImmutableList fieldNames = - StaticHiveFunctionRegistry.UDTF_RETURN_FIELD_NAME_MAP.getOrDefault(functionName, null); + StaticHiveFunctionRegistry.UDTF_RETURN_FIELD_NAME_MAP.getOrDefault(functionName, null); if (fieldNames == null) { throw new RuntimeException("User defined table function " + functionName + " is not registered."); } @@ -173,11 +199,11 @@ private SqlNode visitLateralViewUDTF(List sqlNodes, List alias fieldNames.forEach(name -> asOperands.add(new SqlIdentifier(name, ZERO))); SqlCall aliasCall = SqlStdOperatorTable.AS.createCall(ZERO, asOperands); return new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), JoinType.COMMA.symbol(ZERO), - aliasCall/*lateralCall*/, JoinConditionType.NONE.symbol(ZERO), null); + aliasCall/*lateralCall*/, JoinConditionType.NONE.symbol(ZERO), null); } private SqlNode visitLateralViewExplode(List sqlNodes, List aliasOperands, - SqlCall tableFunctionCall, boolean isOuter) { + SqlCall tableFunctionCall, boolean isOuter) { final int operandCount = aliasOperands.size(); // explode array if operandCount == 3: LATERAL VIEW EXPLODE(op0) op1 AS op2 // explode map if operandCount == 4: LATERAL VIEW EXPLODE(op0) op1 AS op2, op3 @@ -189,7 +215,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al // Note that `operandCount == 2 && isOuter` is not supported yet due to the lack of type information needed // to derive the correct IF function parameters. checkState(operandCount == 2 || operandCount == 3 || operandCount == 4, - format("Unsupported LATERAL VIEW EXPLODE operand number: %d", operandCount)); + format("Unsupported LATERAL VIEW EXPLODE operand number: %d", operandCount)); // TODO The code below assumes LATERAL VIEW is used with UNNEST EXPLODE/POSEXPLODE only. It should be made more generic. SqlCall unnestCall = tableFunctionCall; SqlNode unnestOperand = unnestCall.operand(0); @@ -197,25 +223,25 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al if (isOuter) { checkState(operandCount > 2, - "LATERAL VIEW OUTER EXPLODE without column aliases is not supported. Add 'AS col' or 'AS key, value' to fix it"); + "LATERAL VIEW OUTER EXPLODE without column aliases is not supported. Add 'AS col' or 'AS key, value' to fix it"); // transforms unnest(b) to unnest( if(b is null or cardinality(b) = 0, ARRAY(null)/MAP(null, null), b)) SqlNode operandIsNull = SqlStdOperatorTable.IS_NOT_NULL.createCall(ZERO, unnestOperand); SqlNode emptyArray = SqlStdOperatorTable.GREATER_THAN.createCall(ZERO, - SqlStdOperatorTable.CARDINALITY.createCall(ZERO, unnestOperand), SqlLiteral.createExactNumeric("0", ZERO)); + SqlStdOperatorTable.CARDINALITY.createCall(ZERO, unnestOperand), SqlLiteral.createExactNumeric("0", ZERO)); SqlNode ifCondition = SqlStdOperatorTable.AND.createCall(ZERO, operandIsNull, emptyArray); // array of [null] or map of (null, null) should be 3rd param to if function. With our type inference, calcite acts // smart and for unnest(array[null]) or unnest(map(null, null)) determines return type to be null SqlNode arrayOrMapOfNull; if (operandCount == 3 - || (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality)) { + || (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality)) { arrayOrMapOfNull = SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR.createCall(ZERO, SqlLiteral.createNull(ZERO)); } else { arrayOrMapOfNull = SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall(ZERO, SqlLiteral.createNull(ZERO), - SqlLiteral.createNull(ZERO)); + SqlLiteral.createNull(ZERO)); } Function hiveIfFunction = functionResolver.tryResolve("if", null, 1); unnestOperand = hiveIfFunction.createCall(SqlLiteral.createCharString("if", ZERO), - ImmutableList.of(ifCondition, unnestOperand, arrayOrMapOfNull), null); + ImmutableList.of(ifCondition, unnestOperand, arrayOrMapOfNull), null); } unnestCall = operator.createCall(ZERO, unnestOperand); @@ -241,7 +267,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al SqlNode as = SqlStdOperatorTable.AS.createCall(ZERO, asOperands); return new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), JoinType.COMMA.symbol(ZERO), as, - JoinConditionType.NONE.symbol(ZERO), null); + JoinConditionType.NONE.symbol(ZERO), null); } private SqlNode visitLateralViewJsonTuple(List sqlNodes, List aliasOperands, SqlCall sqlCall) { @@ -270,12 +296,12 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b // '$["jsonKey"]' SqlCall jsonPath = SqlStdOperatorTable.CONCAT.createCall(ZERO, - SqlStdOperatorTable.CONCAT.createCall(ZERO, SqlLiteral.createCharString("$[\"", ZERO), jsonKey), - SqlLiteral.createCharString("\"]", ZERO)); + SqlStdOperatorTable.CONCAT.createCall(ZERO, SqlLiteral.createCharString("$[\"", ZERO), jsonKey), + SqlLiteral.createCharString("\"]", ZERO)); SqlCall getJsonObjectCall = - getJsonObjectFunction.createCall(SqlLiteral.createCharString(getJsonObjectFunction.getFunctionName(), ZERO), - ImmutableList.of(jsonInput, jsonPath), null); + getJsonObjectFunction.createCall(SqlLiteral.createCharString(getJsonObjectFunction.getFunctionName(), ZERO), + ImmutableList.of(jsonInput, jsonPath), null); // TODO Hive get_json_object returns a string, but currently is mapped in Trino to json_extract which returns a json. Once fixed, remove the CAST SqlCall castToString = SqlStdOperatorTable.CAST.createCall(ZERO, getJsonObjectCall, // TODO This results in CAST to VARCHAR(65535), which may be too short, but there seems to be no way to avoid that. @@ -283,9 +309,9 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b createBasicTypeSpec(SqlTypeName.VARCHAR)); // TODO support jsonKey containing a quotation mark (") or backslash (\) SqlCall ifCondition = - HiveRLikeOperator.RLIKE.createCall(ZERO, jsonKey, SqlLiteral.createCharString("^[^\\\"]*$", ZERO)); + HiveRLikeOperator.RLIKE.createCall(ZERO, jsonKey, SqlLiteral.createCharString("^[^\\\"]*$", ZERO)); SqlCall ifFunctionCall = ifFunction.createCall(SqlLiteral.createCharString(ifFunction.getFunctionName(), ZERO), - ImmutableList.of(ifCondition, castToString, SqlLiteral.createNull(ZERO)), null); + ImmutableList.of(ifCondition, castToString, SqlLiteral.createNull(ZERO)), null); SqlNode projection = ifFunctionCall; // Currently only explicit aliasing is supported. Implicit alias would be c0, c1, etc. projections.add(SqlStdOperatorTable.AS.createCall(ZERO, projection, keyAlias)); @@ -295,9 +321,9 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b new SqlSelect(ZERO, null, new SqlNodeList(projections, ZERO), null, null, null, null, null, null, null, null); SqlNode lateral = SqlStdOperatorTable.LATERAL.createCall(ZERO, select); SqlCall lateralAlias = SqlStdOperatorTable.AS.createCall(ZERO, - ImmutableList. builder().add(lateral).addAll(aliasOperands.subList(1, aliasOperands.size())).build()); + ImmutableList. builder().add(lateral).addAll(aliasOperands.subList(1, aliasOperands.size())).build()); SqlNode joinNode = new SqlJoin(ZERO, sqlNodes.get(1), SqlLiteral.createBoolean(false, ZERO), - JoinType.COMMA.symbol(ZERO), lateralAlias, JoinConditionType.NONE.symbol(ZERO), null); + JoinType.COMMA.symbol(ZERO), lateralAlias, JoinConditionType.NONE.symbol(ZERO), null); return joinNode; } @@ -344,7 +370,7 @@ private SqlNode processJoin(ASTNode node, ParseContext ctx, JoinType joinType) { } return new SqlJoin(ZERO, children.get(0), SqlLiteral.createBoolean(false, ZERO), joinType.symbol(ZERO), - children.get(1), conditionType.symbol(ZERO), condition); + children.get(1), conditionType.symbol(ZERO), condition); } @Override @@ -458,7 +484,7 @@ protected SqlNode visitOperator(ASTNode node, ParseContext ctx) { return visitBinaryOperator(node, ctx); } else { throw new RuntimeException( - String.format("Unhandled AST operator: %s with > 2 children, tree: %s", node.getText(), node.dump())); + String.format("Unhandled AST operator: %s with > 2 children, tree: %s", node.getText(), node.dump())); } } @@ -500,7 +526,7 @@ protected SqlNode visitLParen(ASTNode node, ParseContext ctx) { protected SqlNode visitFunctionStar(ASTNode node, ParseContext ctx) { ASTNode functionNode = (ASTNode) node.getChildren().get(0); List functions = SqlStdOperatorTable.instance().getOperatorList().stream() - .filter(f -> functionNode.getText().equalsIgnoreCase(f.getName())).collect(Collectors.toList()); + .filter(f -> functionNode.getText().equalsIgnoreCase(f.getName())).collect(Collectors.toList()); checkState(functions.size() == 1); return new SqlBasicCall(functions.get(0), new SqlNode[] { new SqlIdentifier("", ZERO) }, ZERO); } @@ -522,8 +548,8 @@ private SqlNode visitFunctionInternal(ASTNode node, ParseContext ctx, SqlLiteral String functionName = functionNode.getText(); List sqlOperands = visitChildren(children, ctx); Function hiveFunction = functionResolver.tryResolve(functionName, ctx.hiveTable.orElse(null), - // The first element of sqlOperands is the operator itself. The actual # of operands is sqlOperands.size() - 1 - sqlOperands.size() - 1); + // The first element of sqlOperands is the operator itself. The actual # of operands is sqlOperands.size() - 1 + sqlOperands.size() - 1); // Special treatment for Window Function SqlNode lastSqlOperand = sqlOperands.get(sqlOperands.size() - 1); @@ -536,7 +562,7 @@ private SqlNode visitFunctionInternal(ASTNode node, ParseContext ctx, SqlLiteral // SqlBasicCall("OVER") will have 2 children: "func" and SqlWindow /** See {@link #visitWindowSpec(ASTNode, ParseContext)} for SQL, AST Tree and SqlNode Tree examples */ SqlNode func = - hiveFunction.createCall(sqlOperands.get(0), sqlOperands.subList(1, sqlOperands.size() - 1), quantifier); + hiveFunction.createCall(sqlOperands.get(0), sqlOperands.subList(1, sqlOperands.size() - 1), quantifier); SqlNode window = lastSqlOperand; return new SqlBasicCall(SqlStdOperatorTable.OVER, new SqlNode[] { func, window }, ZERO); } @@ -693,7 +719,7 @@ protected SqlNode visitTabRefNode(ASTNode node, ParseContext ctx) { protected SqlNode visitTabnameNode(ASTNode node, ParseContext ctx) { List sqlNodes = visitChildren(node, ctx); List names = - sqlNodes.stream().map(s -> ((SqlIdentifier) s).names).flatMap(List::stream).collect(Collectors.toList()); + sqlNodes.stream().map(s -> ((SqlIdentifier) s).names).flatMap(List::stream).collect(Collectors.toList()); return new SqlIdentifier(names, ZERO); } @@ -797,7 +823,7 @@ protected SqlNode visitQueryNode(ASTNode node, ParseContext ctx) { } } SqlSelect select = new SqlSelect(ZERO, qc.keywords, qc.selects, qc.from, qc.where, qc.grpBy, qc.having, null, - qc.orderBy, null, qc.fetch); + qc.orderBy, null, qc.fetch); if (cte != null) { // Calcite uses "SqlWith(SqlNodeList of SqlWithItem, SqlSelect)" to represent queries with WITH /** See {@link #visitCTE(ASTNode, ParseContext) visitCTE} for details */ @@ -871,8 +897,8 @@ protected SqlNode visitDecimal(ASTNode node, ParseContext ctx) { if (node.getChildCount() == 2) { try { final SqlTypeNameSpec typeNameSpec = new SqlBasicTypeNameSpec(SqlTypeName.DECIMAL, - Integer.parseInt(((ASTNode) node.getChildren().get(0)).getText()), - Integer.parseInt(((ASTNode) node.getChildren().get(1)).getText()), ZERO); + Integer.parseInt(((ASTNode) node.getChildren().get(0)).getText()), + Integer.parseInt(((ASTNode) node.getChildren().get(1)).getText()), ZERO); return new SqlDataTypeSpec(typeNameSpec, ZERO); } catch (NumberFormatException e) { return createBasicTypeSpec(SqlTypeName.DECIMAL); @@ -990,9 +1016,9 @@ protected SqlNode visitWindowSpec(ASTNode node, ParseContext ctx) { SqlWindow window = windowRange != null ? windowRange : windowValues; return new SqlWindow(ZERO, null, null, partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getPartitionList(), - partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getOrderList(), - SqlLiteral.createBoolean(windowRange != null, ZERO), window == null ? null : window.getLowerBound(), - window == null ? null : window.getUpperBound(), null); + partitionSpec == null ? SqlNodeList.EMPTY : partitionSpec.getOrderList(), + SqlLiteral.createBoolean(windowRange != null, ZERO), window == null ? null : window.getLowerBound(), + window == null ? null : window.getUpperBound(), null); } @Override @@ -1000,7 +1026,7 @@ protected SqlNode visitPartitioningSpec(ASTNode node, ParseContext ctx) { SqlNode partitionList = visitOptionalChildByType(node, ctx, HiveParser.TOK_DISTRIBUTEBY); SqlNode orderList = visitOptionalChildByType(node, ctx, HiveParser.TOK_ORDERBY); return new SqlWindow(ZERO, null, null, partitionList != null ? (SqlNodeList) partitionList : SqlNodeList.EMPTY, - orderList != null ? (SqlNodeList) orderList : SqlNodeList.EMPTY, null, null, null, null); + orderList != null ? (SqlNodeList) orderList : SqlNodeList.EMPTY, null, null, null, null); } @Override From d76b2c63ff4e59c5c032cf5d9a2c0063df2e192b Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Mon, 31 Oct 2022 23:14:23 +0530 Subject: [PATCH 07/11] fixing ide lint unnncessary changes --- .../com/linkedin/coral/common/ToRelConverter.java | 3 +-- .../hive/hive2rel/parsetree/ParseTreeBuilder.java | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index 8cb0c2855..b9000cec6 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -160,8 +160,7 @@ public SqlNode processView(String dbName, String tableName) { public RelNode toRel(SqlNode sqlNode) { RelRoot root = getSqlToRelConverter().convertQuery(sqlNode, true, true); - RelNode relNode = standardizeRel(root.rel); - return relNode; + return standardizeRel(root.rel); } /** diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index bc915cb2e..9d2db7c94 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -203,7 +203,7 @@ private SqlNode visitLateralViewUDTF(List sqlNodes, List alias } private SqlNode visitLateralViewExplode(List sqlNodes, List aliasOperands, - SqlCall tableFunctionCall, boolean isOuter) { + SqlCall tableFunctionCall, boolean isOuter) { final int operandCount = aliasOperands.size(); // explode array if operandCount == 3: LATERAL VIEW EXPLODE(op0) op1 AS op2 // explode map if operandCount == 4: LATERAL VIEW EXPLODE(op0) op1 AS op2, op3 @@ -257,7 +257,7 @@ private SqlNode visitLateralViewExplode(List sqlNodes, List al // For POSEXPLODE case, we need to change the order of 2 alias. i.e. `pos, val` -> `val, pos` to be aligned with calcite validation if (operator instanceof CoralSqlUnnestOperator && ((CoralSqlUnnestOperator) operator).withOrdinality - && operandCount == 4) { + && operandCount == 4) { asOperands.add(aliasOperands.get(1)); asOperands.add(aliasOperands.get(3)); asOperands.add(aliasOperands.get(2)); @@ -304,9 +304,9 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b ImmutableList.of(jsonInput, jsonPath), null); // TODO Hive get_json_object returns a string, but currently is mapped in Trino to json_extract which returns a json. Once fixed, remove the CAST SqlCall castToString = SqlStdOperatorTable.CAST.createCall(ZERO, getJsonObjectCall, - // TODO This results in CAST to VARCHAR(65535), which may be too short, but there seems to be no way to avoid that. - // even `new SqlDataTypeSpec(new SqlBasicTypeNameSpec(SqlTypeName.VARCHAR, Integer.MAX_VALUE - 1, ZERO), ZERO)` results in a limited VARCHAR precision. - createBasicTypeSpec(SqlTypeName.VARCHAR)); + // TODO This results in CAST to VARCHAR(65535), which may be too short, but there seems to be no way to avoid that. + // even `new SqlDataTypeSpec(new SqlBasicTypeNameSpec(SqlTypeName.VARCHAR, Integer.MAX_VALUE - 1, ZERO), ZERO)` results in a limited VARCHAR precision. + createBasicTypeSpec(SqlTypeName.VARCHAR)); // TODO support jsonKey containing a quotation mark (") or backslash (\) SqlCall ifCondition = HiveRLikeOperator.RLIKE.createCall(ZERO, jsonKey, SqlLiteral.createCharString("^[^\\\"]*$", ZERO)); @@ -318,7 +318,7 @@ LATERAL VIEW json_tuple(json, p1, p2) jt AS a, b } SqlNode select = - new SqlSelect(ZERO, null, new SqlNodeList(projections, ZERO), null, null, null, null, null, null, null, null); + new SqlSelect(ZERO, null, new SqlNodeList(projections, ZERO), null, null, null, null, null, null, null, null); SqlNode lateral = SqlStdOperatorTable.LATERAL.createCall(ZERO, select); SqlCall lateralAlias = SqlStdOperatorTable.AS.createCall(ZERO, ImmutableList. builder().add(lateral).addAll(aliasOperands.subList(1, aliasOperands.size())).build()); From 51b5451b7d1f70ed6a2ff5d32e1482e41471e196 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Thu, 3 Nov 2022 19:13:47 +0530 Subject: [PATCH 08/11] write UT for parse tree builder CTAS --- .../common/calcite/sql/SqlCreateTable.java | 62 +++++++++++-------- .../parsetree/ParseTreeBuilderTest.java | 19 +++++- .../controller/TranslationController.java | 13 ---- .../coralservice/utils/TranslationUtils.java | 6 -- .../com/linkedin/coral/spark/CoralSpark.java | 7 ++- 5 files changed, 58 insertions(+), 49 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java index 846d6e9f6..d52e3f3b2 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -8,32 +8,40 @@ import java.util.List; import java.util.Objects; -public class SqlCreateTable extends SqlCreate { + +public class SqlCreateTable extends SqlCreate implements SqlCommand{ + //name of the table to be created private final SqlIdentifier name; + // column details like column name, data type, etc. This may be null, like in case of CTAS private final @Nullable SqlNodeList columnList; - private @Nullable SqlNode query; - private final @Nullable SqlNode tableSerializer; - private final @Nullable SqlNodeList tableFileFormat; - private final @Nullable SqlCharStringLiteral tableRowFormat; + // select query node in case of "CREATE TABLE ... AS query"; else may be null + private @Nullable SqlNode selectQuery; + // specifying serde property + private final @Nullable SqlNode serDe; + // specifying file format such as Parquet, ORC, etc. + private final @Nullable SqlNodeList fileFormat; + // specifying delimiter fields for row format + private final @Nullable SqlCharStringLiteral rowFormat; private static final SqlOperator OPERATOR = new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); /** Creates a SqlCreateTable. */ public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, - SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode query, SqlNode tableSerializer, SqlNodeList tableFileFormat, SqlCharStringLiteral tableRowFormat) { + SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode selectQuery, + @Nullable SqlNode serDe, @Nullable SqlNodeList fileFormat, @Nullable SqlCharStringLiteral rowFormat) { super(OPERATOR, pos, replace, ifNotExists); this.name = Objects.requireNonNull(name, "name"); - this.columnList = columnList; // may be null, like in case of ctas - this.query = query; // for "CREATE TABLE ... AS query"; may be null - this.tableSerializer = tableSerializer; - this.tableFileFormat = tableFileFormat; - this.tableRowFormat = tableRowFormat; + this.columnList = columnList; + this.selectQuery = selectQuery; + this.serDe = serDe; + this.fileFormat = fileFormat; + this.rowFormat = rowFormat; } @SuppressWarnings("nullness") @Override public List getOperandList() { - return ImmutableNullableList.of(name, columnList, query); + return ImmutableNullableList.of(name, columnList, selectQuery, serDe, fileFormat, rowFormat); } @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { @@ -51,41 +59,43 @@ public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, } writer.endList(frame); } - if(tableSerializer != null){ + if(serDe != null){ writer.keyword("ROW FORMAT SERDE"); - tableSerializer.unparse(writer, 0, 0); + serDe.unparse(writer, 0, 0); writer.newlineAndIndent(); } - if(tableRowFormat != null){ + if(rowFormat != null){ writer.keyword("ROW FORMAT DELIMITED FIELDS TERMINATED BY"); - tableRowFormat.unparse(writer, 0, 0); + rowFormat.unparse(writer, 0, 0); writer.newlineAndIndent(); } - if(tableFileFormat != null){ - if(tableFileFormat.size() == 1){ + if(fileFormat != null){ + if(fileFormat.size() == 1){ writer.keyword("STORED AS"); - tableFileFormat.get(0).unparse(writer, 0, 0); + fileFormat.get(0).unparse(writer, 0, 0); writer.newlineAndIndent(); } else { writer.keyword("STORED AS INPUTFORMAT"); - tableFileFormat.get(0).unparse(writer, 0, 0); + fileFormat.get(0).unparse(writer, 0, 0); writer.keyword("OUTPUTFORMAT"); - tableFileFormat.get(1).unparse(writer, 0, 0); + fileFormat.get(1).unparse(writer, 0, 0); writer.newlineAndIndent(); } } - if (query != null) { + if (selectQuery != null) { writer.keyword("AS"); writer.newlineAndIndent(); - query.unparse(writer, 0, 0); + selectQuery.unparse(writer, 0, 0); } } + @Override public SqlNode getSelectQuery() { - return query; + return selectQuery; } - public void setQuery(SqlNode query) { - this.query = query; + @Override + public void setSelectQuery(SqlNode query) { + this.selectQuery = query; } } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java index e332a0d3d..8d630e281 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java @@ -147,6 +147,7 @@ public Iterator getConvertSql() { "SELECT LAST_VALUE(c) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS min_c FROM foo", "SELECT STDDEV(c) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) AS min_c FROM foo", "SELECT VARIANCE(c) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS min_c FROM foo"); + // We wrap the SQL to be tested here rather than wrap each SQL statement in the its own array in the constant return convertSql.stream().map(x -> new Object[] { x }).iterator(); } @@ -193,7 +194,23 @@ public Iterator getValidateSql() { "SELECT CASE WHEN `a` THEN 10 WHEN `b` THEN 20 ELSE 30 END FROM `foo`"), ImmutableList.of("SELECT named_struct('abc', 123, 'def', 234.23) FROM foo", "SELECT `named_struct`('abc', 123, 'def', 234.23) FROM `foo`"), - ImmutableList.of("SELECT 0L FROM foo", "SELECT 0 FROM `foo`")); + ImmutableList.of("SELECT 0L FROM foo", "SELECT 0 FROM `foo`"), + + //Basic CTAS query + ImmutableList.of("CREATE TABLE sample AS select * from tmp","CREATE TABLE `sample` AS select * from `tmp`"), + //CTAS query with IF NOT EXISTS keyword + ImmutableList.of("CREATE TABLE IF NOT EXISTS sample AS SELECT * FROM tmp","CREATE TABLE IF NOT EXISTS `sample` AS select * from `tmp`"), + //CTAS query with storage format + ImmutableList.of("CREATE TABLE sample STORED AS ORC AS SELECT * FROM tmp","CREATE TABLE `sample` STORED AS `ORC` AS select * from `tmp`"), + //CTAS query with input and output formats + ImmutableList.of("CREATE TABLE sample STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), + //CTAS query with serde + ImmutableList.of("CREATE TABLE sample ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), + //CTAS query with wow format delimiter fields + ImmutableList.of("CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`")); return convertAndValidateSql.stream().map(x -> new Object[] { x.get(0), x.get(1) }).iterator(); } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java index 3ccb38ac0..6d0aa67b3 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java @@ -65,7 +65,6 @@ public ResponseEntity translate(@RequestBody TranslateRequestBody translateReque translatedSql = translateTrinoToSpark(query); } } -<<<<<<< HEAD // From Hive else if (fromLanguage.equalsIgnoreCase("hive")) { // To Spark @@ -76,18 +75,6 @@ else if (fromLanguage.equalsIgnoreCase("hive")) { else if (toLanguage.equalsIgnoreCase("trino")) { translatedSql = translateHiveToTrino(query); } -======= - } - // From Hive - else if (fromLanguage.equalsIgnoreCase("hive")) { - // To Spark - if (toLanguage.equalsIgnoreCase("spark")) { - translatedSql = translateHiveQueryToSparkSql(query); - } - // To Trino - else if (toLanguage.equalsIgnoreCase("trino")) { - translatedSql = translateHiveToTrino(query); ->>>>>>> c5df021 (Adding translation logic Coral Spark and adding node of interest logic) } } catch (Throwable t) { t.printStackTrace(); diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index da720eea1..c980b8205 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -32,12 +32,6 @@ public static String translateHiveToTrino(String query) { } public static String translateHiveToSpark(String query) { - RelNode relNode = hiveToRelConverter.convertSql(query); - CoralSpark coralSpark = CoralSpark.create(relNode); - return coralSpark.getSparkSql(); - } - - public static String translateHiveQueryToSparkSql(String query){ HiveToRelConverter hiveToRelConverter = new HiveToRelConverter(hiveMetastoreClient); SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); Function hiveSqlNodeToRelConverter = hiveToRelConverter::toRel; diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 1f4763131..404443a1f 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -9,6 +9,7 @@ import java.util.function.Function; import java.util.stream.Collectors; +import com.linkedin.coral.common.calcite.sql.SqlCommand; import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.avro.Schema; import org.apache.calcite.plan.RelOptTable; @@ -72,7 +73,7 @@ public static CoralSpark create(RelNode irRelNode) { List sparkUDFInfos = sparkRelInfo.getSparkUDFInfoList(); return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); } - + /** * Users use this function as the main API for getting CoralSpark instance. * This should be used when user need to align the Coral-spark translated SQL @@ -138,11 +139,11 @@ private static String constructSparkSQL(RelNode sparkRelNode) { public static CoralSpark create(SqlNode sqlNode, Function convertor){ SparkRelInfo sparkRelInfo; //apply RelNode transformations for sqlNode eligible for transformation. - if(sqlNode instanceof SqlCreate) { + if(sqlNode instanceof SqlCommand) { SqlNode selectNode = ((SqlCreateTable) sqlNode).getSelectQuery(); sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(selectNode)); selectNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); - ((SqlCreateTable) sqlNode).setQuery(selectNode); + ((SqlCreateTable) sqlNode).setSelectQuery(selectNode); } else { sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(sqlNode)); sqlNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); From ce2df6cc413808095dae1d7390d4807a6184bba5 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Wed, 9 Nov 2022 20:40:28 +0530 Subject: [PATCH 09/11] Refactoring code Coral Spark, add Uts and add java doc comments --- .../linkedin/coral/common/ToRelConverter.java | 2 + .../coral/common/calcite/DdlSqlValidator.java | 14 ++ .../coral/common/calcite/sql/SqlCommand.java | 15 +- .../common/calcite/sql/SqlCreateTable.java | 200 ++++++++++-------- .../common/calcite/sql/util/SqlDdlNodes.java | 26 +++ .../hive/hive2rel/HiveToRelConverter.java | 7 +- .../parsetree/AbstractASTVisitor.java | 45 ++-- .../hive2rel/parsetree/ParseTreeBuilder.java | 26 +-- .../validators/HiveDdlSqlValidator.java | 25 +++ .../parsetree/ParseTreeBuilderTest.java | 23 +- .../coralservice/utils/TranslationUtils.java | 21 +- .../com/linkedin/coral/spark/CoralSpark.java | 48 ++++- 12 files changed, 317 insertions(+), 135 deletions(-) create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/calcite/DdlSqlValidator.java create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/util/SqlDdlNodes.java create mode 100644 coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/validators/HiveDdlSqlValidator.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index b9000cec6..40321704b 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -118,6 +118,7 @@ protected ToRelConverter(Map>> localMetaStore) public RelNode convertSql(String sql) { return toRel(toSqlNode(sql)); } + /** * Similar to {@link #convertSql(String)} but converts hive view definition stored * in the hive metastore to corresponding {@link RelNode} implementation. @@ -160,6 +161,7 @@ public SqlNode processView(String dbName, String tableName) { public RelNode toRel(SqlNode sqlNode) { RelRoot root = getSqlToRelConverter().convertQuery(sqlNode, true, true); + return standardizeRel(root.rel); } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/DdlSqlValidator.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/DdlSqlValidator.java new file mode 100644 index 000000000..ffc8576e4 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/DdlSqlValidator.java @@ -0,0 +1,14 @@ +/** + * Copyright 2022 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common.calcite; + +import org.apache.calcite.sql.SqlNode; + + +public interface DdlSqlValidator { + + void validate(SqlNode ddlSqlNode); +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java index d45ed1c68..6158533dc 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCommand.java @@ -1,10 +1,19 @@ +/** + * Copyright 2022 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ package com.linkedin.coral.common.calcite.sql; import org.apache.calcite.sql.SqlNode; + +/** + * Interface for SqlNodes containing select statements as a child node. Ex: CTAS queries + */ public interface SqlCommand { - public SqlNode getSelectQuery(); + SqlNode getSelectQuery(); - public void setSelectQuery(SqlNode selectQuery); -} \ No newline at end of file + void setSelectQuery(SqlNode selectQuery); +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java index d52e3f3b2..8402dbcfc 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/SqlCreateTable.java @@ -1,101 +1,131 @@ +/** + * Copyright 2022 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ package com.linkedin.coral.common.calcite.sql; -import com.linkedin.coral.javax.annotation.Nullable; +import java.util.List; + import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.util.ImmutableNullableList; -import java.util.List; -import java.util.Objects; +import com.linkedin.coral.javax.annotation.Nonnull; +import com.linkedin.coral.javax.annotation.Nullable; -public class SqlCreateTable extends SqlCreate implements SqlCommand{ - //name of the table to be created - private final SqlIdentifier name; - // column details like column name, data type, etc. This may be null, like in case of CTAS - private final @Nullable SqlNodeList columnList; - // select query node in case of "CREATE TABLE ... AS query"; else may be null - private @Nullable SqlNode selectQuery; - // specifying serde property - private final @Nullable SqlNode serDe; - // specifying file format such as Parquet, ORC, etc. - private final @Nullable SqlNodeList fileFormat; - // specifying delimiter fields for row format - private final @Nullable SqlCharStringLiteral rowFormat; +/** + * SQL parse tree node to represent {@code CREATE} statements, + *

Supported Syntax: + * + *

+ * CREATE TABLE [ IF NOT EXISTS ] name + * [ROW FORMAT SERDE serde] + * [ROW FORMAT DELIMITED FIELDS TERMINATED BY rowFormat] + * [STORED AS fileFormat] + * [STORED AS INPUTFORMAT inputFormat STORED AS OUTPUTFORMAT outputFormat] + * [ AS query ] + * + *
+ * + *

Examples: + * + *

    + *
  • CREATE TABLE IF NOT EXISTS sample AS SELECT * FROM tmp
  • + *
  • CREATE TABLE sample STORED AS ORC AS SELECT * FROM tmp
  • + *
  • CREATE TABLE sample STORED AS INPUTFORMAT 'SerDeExampleInputFormat' OUTPUTFORMAT 'SerDeExampleOutputFormat' AS SELECT * FROM tmp
  • + *
  • CREATE TABLE sample ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' AS SELECT * FROM tmp
  • + *
  • CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS SELECT * FROM tmp
  • + *
+ */ +public class SqlCreateTable extends SqlCreate implements SqlCommand { + // name of the table to be created + private final SqlIdentifier name; + // column details like column name, data type, etc. This may be null, like in case of CTAS + private final @Nullable SqlNodeList columnList; + // select query node in case of "CREATE TABLE ... AS query"; else may be null + private @Nullable SqlNode selectQuery; + // specifying serde property + private final @Nullable SqlNode serDe; + // specifying file format such as Parquet, ORC, etc. + private final @Nullable SqlNodeList fileFormat; + // specifying delimiter fields for row format + private final @Nullable SqlCharStringLiteral rowFormat; - private static final SqlOperator OPERATOR = - new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); + private static final SqlOperator OPERATOR = new SqlSpecialOperator("CREATE TABLE", SqlKind.CREATE_TABLE); - /** Creates a SqlCreateTable. */ - public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, - SqlIdentifier name, @Nullable SqlNodeList columnList, @Nullable SqlNode selectQuery, - @Nullable SqlNode serDe, @Nullable SqlNodeList fileFormat, @Nullable SqlCharStringLiteral rowFormat) { - super(OPERATOR, pos, replace, ifNotExists); - this.name = Objects.requireNonNull(name, "name"); - this.columnList = columnList; - this.selectQuery = selectQuery; - this.serDe = serDe; - this.fileFormat = fileFormat; - this.rowFormat = rowFormat; - } + /** Creates a SqlCreateTable. */ + public SqlCreateTable(SqlParserPos pos, boolean replace, boolean ifNotExists, @Nonnull SqlIdentifier name, + @Nullable SqlNodeList columnList, @Nullable SqlNode selectQuery, @Nullable SqlNode serDe, + @Nullable SqlNodeList fileFormat, @Nullable SqlCharStringLiteral rowFormat) { + super(OPERATOR, pos, replace, ifNotExists); + this.name = name; + this.columnList = columnList; + this.selectQuery = selectQuery; + this.serDe = serDe; + this.fileFormat = fileFormat; + this.rowFormat = rowFormat; + } - @SuppressWarnings("nullness") - @Override public List getOperandList() { - return ImmutableNullableList.of(name, columnList, selectQuery, serDe, fileFormat, rowFormat); - } + @SuppressWarnings("nullness") + @Override + public List getOperandList() { + return ImmutableNullableList.of(name, columnList, selectQuery, serDe, fileFormat, rowFormat); + } - @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { - writer.keyword("CREATE"); - writer.keyword("TABLE"); - if (ifNotExists) { - writer.keyword("IF NOT EXISTS"); - } - name.unparse(writer, leftPrec, rightPrec); - if (columnList != null) { - SqlWriter.Frame frame = writer.startList("(", ")"); - for (SqlNode c : columnList) { - writer.sep(","); - c.unparse(writer, 0, 0); - } - writer.endList(frame); - } - if(serDe != null){ - writer.keyword("ROW FORMAT SERDE"); - serDe.unparse(writer, 0, 0); - writer.newlineAndIndent(); - } - if(rowFormat != null){ - writer.keyword("ROW FORMAT DELIMITED FIELDS TERMINATED BY"); - rowFormat.unparse(writer, 0, 0); - writer.newlineAndIndent(); - } - if(fileFormat != null){ - if(fileFormat.size() == 1){ - writer.keyword("STORED AS"); - fileFormat.get(0).unparse(writer, 0, 0); - writer.newlineAndIndent(); - } else { - writer.keyword("STORED AS INPUTFORMAT"); - fileFormat.get(0).unparse(writer, 0, 0); - writer.keyword("OUTPUTFORMAT"); - fileFormat.get(1).unparse(writer, 0, 0); - writer.newlineAndIndent(); - } - } - if (selectQuery != null) { - writer.keyword("AS"); - writer.newlineAndIndent(); - selectQuery.unparse(writer, 0, 0); - } + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + writer.keyword("CREATE"); + writer.keyword("TABLE"); + if (ifNotExists) { + writer.keyword("IF NOT EXISTS"); } - - @Override - public SqlNode getSelectQuery() { - return selectQuery; + name.unparse(writer, leftPrec, rightPrec); + if (columnList != null) { + SqlWriter.Frame frame = writer.startList("(", ")"); + for (SqlNode c : columnList) { + writer.sep(","); + c.unparse(writer, 0, 0); + } + writer.endList(frame); } - - @Override - public void setSelectQuery(SqlNode query) { - this.selectQuery = query; + if (serDe != null) { + writer.keyword("ROW FORMAT SERDE"); + serDe.unparse(writer, 0, 0); + writer.newlineAndIndent(); + } + if (rowFormat != null) { + writer.keyword("ROW FORMAT DELIMITED FIELDS TERMINATED BY"); + rowFormat.unparse(writer, 0, 0); + writer.newlineAndIndent(); + } + if (fileFormat != null) { + if (fileFormat.size() == 1) { + writer.keyword("STORED AS"); + fileFormat.get(0).unparse(writer, 0, 0); + } else { + writer.keyword("STORED AS INPUTFORMAT"); + fileFormat.get(0).unparse(writer, 0, 0); + writer.keyword("OUTPUTFORMAT"); + fileFormat.get(1).unparse(writer, 0, 0); + } + writer.newlineAndIndent(); } + if (selectQuery != null) { + writer.keyword("AS"); + writer.newlineAndIndent(); + selectQuery.unparse(writer, 0, 0); + } + } + + @Override + public SqlNode getSelectQuery() { + return selectQuery; + } + + @Override + public void setSelectQuery(SqlNode query) { + this.selectQuery = query; + } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/util/SqlDdlNodes.java b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/util/SqlDdlNodes.java new file mode 100644 index 000000000..f5997bee9 --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/calcite/sql/util/SqlDdlNodes.java @@ -0,0 +1,26 @@ +/** + * Copyright 2022 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common.calcite.sql.util; + +import org.apache.calcite.sql.SqlCharStringLiteral; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.parser.SqlParserPos; + +import com.linkedin.coral.common.calcite.sql.SqlCreateTable; + + +public class SqlDdlNodes { + + /** Creates a CREATE TABLE. */ + public static SqlCreateTable createTable(SqlParserPos pos, boolean replace, boolean ifNotExists, SqlIdentifier name, + SqlNodeList columnList, SqlNode query, SqlNode tableSerializer, SqlNodeList tableFileFormat, + SqlCharStringLiteral tableRowFormat) { + return new SqlCreateTable(pos, replace, ifNotExists, name, columnList, query, tableSerializer, tableFileFormat, + tableRowFormat); + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverter.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverter.java index ad1aaf081..dbe8bb29a 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverter.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverter.java @@ -24,9 +24,11 @@ import com.linkedin.coral.common.HiveMetastoreClient; import com.linkedin.coral.common.ToRelConverter; +import com.linkedin.coral.common.calcite.DdlSqlValidator; import com.linkedin.coral.hive.hive2rel.functions.HiveFunctionResolver; import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry; import com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder; +import com.linkedin.coral.hive.hive2rel.validators.HiveDdlSqlValidator; import static com.linkedin.coral.hive.hive2rel.HiveSqlConformance.HIVE_SQL; @@ -52,6 +54,7 @@ public class HiveToRelConverter extends ToRelConverter { // The validator must be reused SqlValidator sqlValidator = new HiveSqlValidator(getOperatorTable(), getCalciteCatalogReader(), ((JavaTypeFactory) getRelBuilder().getTypeFactory()), HIVE_SQL); + DdlSqlValidator ddlSqlValidator = new HiveDdlSqlValidator(); public HiveToRelConverter(HiveMetastoreClient hiveMetastoreClient) { super(hiveMetastoreClient); @@ -92,7 +95,9 @@ protected SqlToRelConverter getSqlToRelConverter() { @Override protected SqlNode toSqlNode(String sql, Table hiveView) { - return parseTreeBuilder.process(trimParenthesis(sql), hiveView); + SqlNode sqlNode = parseTreeBuilder.process(trimParenthesis(sql), hiveView); + ddlSqlValidator.validate(sqlNode); + return sqlNode; } @Override diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java index b420c07e0..068874844 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/AbstractASTVisitor.java @@ -333,35 +333,56 @@ protected R visit(ASTNode node, C ctx) { } protected R visitTableRowFormatField(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitSerdeProps(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitTableRowFormat(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitSerdeName(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitTableSerializer(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitTableFileFormat(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitFileFormatGeneric(ASTNode node, C ctx) { - return visitChildren(node, ctx).get(0); + if (node.getChildren() != null) { + return visitChildren(node, ctx).get(0); + } + return null; } protected R visitColumn(ASTNode node, C ctx) { - if(node.getChildren() != null){ + if (node.getChildren() != null) { return visitChildren(node, ctx).get(0); } return null; @@ -372,20 +393,20 @@ protected R visitColumnList(ASTNode node, C ctx) { } protected R visitIfNotExists(ASTNode node, C ctx) { - if(node.getChildren() != null){ + if (node.getChildren() != null) { return visitChildren(node, ctx).get(0); } return null; } - protected R visitLikeTable(ASTNode node, C ctx){ - if(node.getChildren() != null){ + protected R visitLikeTable(ASTNode node, C ctx) { + if (node.getChildren() != null) { return visitChildren(node, ctx).get(0); } return null; } - protected R visitCreateTable(ASTNode node, C ctx){ + protected R visitCreateTable(ASTNode node, C ctx) { return visitChildren(node, ctx).get(0); } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java index 9d2db7c94..750b298f0 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilder.java @@ -6,15 +6,14 @@ package com.linkedin.coral.hive.hive2rel.parsetree; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Optional; -import java.util.Arrays; import java.util.stream.Collectors; import javax.annotation.Nullable; -import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.sql.JoinConditionType; import org.apache.calcite.sql.JoinType; @@ -22,6 +21,7 @@ import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlBasicTypeNameSpec; import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.calcite.sql.SqlDataTypeSpec; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlIntervalQualifier; @@ -41,11 +41,11 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.hadoop.hive.metastore.api.Table; import com.linkedin.coral.com.google.common.collect.ImmutableList; import com.linkedin.coral.com.google.common.collect.Iterables; +import com.linkedin.coral.common.calcite.sql.util.SqlDdlNodes; import com.linkedin.coral.common.functions.CoralSqlUnnestOperator; import com.linkedin.coral.common.functions.Function; import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; @@ -612,11 +612,11 @@ protected SqlNode visitSelect(ASTNode node, ParseContext ctx) { } @Override - protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx){ + protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx) { CreateTableOptions ctOptions = new CreateTableOptions(); - for(Node child: node.getChildren()){ + for (Node child : node.getChildren()) { ASTNode ast = (ASTNode) child; - switch (ast.getType()){ + switch (ast.getType()) { case HiveParser.TOK_TABNAME: ctOptions.name = (SqlIdentifier) visitTabnameNode(ast, ctx); break; @@ -645,12 +645,12 @@ protected SqlNode visitCreateTable(ASTNode node, ParseContext ctx){ break; } } - return new SqlCreateTable(ZERO, false, ctOptions.ifNotExists != null ? ctOptions.ifNotExists : false, ctOptions.name, - ctOptions.columnList, ctOptions.query, ctOptions.tableSerializer, ctOptions.tableFileFormat, ctOptions.tableRowFormat); + return SqlDdlNodes.createTable(ZERO, false, ctOptions.ifNotExists, ctOptions.name, ctOptions.columnList, + ctOptions.query, ctOptions.tableSerializer, ctOptions.tableFileFormat, ctOptions.tableRowFormat); } @Override - protected SqlNode visitColumnList(ASTNode node, ParseContext ctx){ + protected SqlNode visitColumnList(ASTNode node, ParseContext ctx) { List sqlNodeList = visitChildren(node, ctx); return new SqlNodeList(sqlNodeList, ZERO); } @@ -666,7 +666,7 @@ protected SqlNode visitIfNotExists(ASTNode node, ParseContext ctx) { } @Override - protected SqlNode visitTableRowFormat(ASTNode node, ParseContext ctx){ + protected SqlNode visitTableRowFormat(ASTNode node, ParseContext ctx) { return visitChildren(node, ctx).get(0); } @@ -1153,13 +1153,13 @@ Optional
getHiveTable() { } } - class CreateTableOptions { + static class CreateTableOptions { SqlIdentifier name; SqlNodeList columnList; SqlNode query; - Boolean ifNotExists; + boolean ifNotExists; SqlNode tableSerializer; SqlNodeList tableFileFormat; SqlCharStringLiteral tableRowFormat; } -} \ No newline at end of file +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/validators/HiveDdlSqlValidator.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/validators/HiveDdlSqlValidator.java new file mode 100644 index 000000000..285f19bf1 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/validators/HiveDdlSqlValidator.java @@ -0,0 +1,25 @@ +/** + * Copyright 2022 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.validators; + +import org.apache.calcite.sql.SqlNode; + +import com.linkedin.coral.common.calcite.DdlSqlValidator; + + +public class HiveDdlSqlValidator implements DdlSqlValidator { + @Override + public void validate(SqlNode ddlSqlNode) { + switch (ddlSqlNode.getKind()) { + case CREATE_TABLE: + validateCreateTable(ddlSqlNode); + } + } + + private void validateCreateTable(SqlNode sqlNode) { + //Todo need to add appropriate validations + } +} diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java index 8d630e281..b94e4d9ed 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/parsetree/ParseTreeBuilderTest.java @@ -197,20 +197,25 @@ public Iterator getValidateSql() { ImmutableList.of("SELECT 0L FROM foo", "SELECT 0 FROM `foo`"), //Basic CTAS query - ImmutableList.of("CREATE TABLE sample AS select * from tmp","CREATE TABLE `sample` AS select * from `tmp`"), + ImmutableList.of("CREATE TABLE sample AS select * from tmp", "CREATE TABLE `sample` AS select * from `tmp`"), //CTAS query with IF NOT EXISTS keyword - ImmutableList.of("CREATE TABLE IF NOT EXISTS sample AS SELECT * FROM tmp","CREATE TABLE IF NOT EXISTS `sample` AS select * from `tmp`"), + ImmutableList.of("CREATE TABLE IF NOT EXISTS sample AS SELECT * FROM tmp", + "CREATE TABLE IF NOT EXISTS `sample` AS select * from `tmp`"), //CTAS query with storage format - ImmutableList.of("CREATE TABLE sample STORED AS ORC AS SELECT * FROM tmp","CREATE TABLE `sample` STORED AS `ORC` AS select * from `tmp`"), + ImmutableList.of("CREATE TABLE sample STORED AS ORC AS SELECT * FROM tmp", + "CREATE TABLE `sample` STORED AS `ORC` AS select * from `tmp`"), //CTAS query with input and output formats - ImmutableList.of("CREATE TABLE sample STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", - "CREATE TABLE `sample` STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), + ImmutableList.of( + "CREATE TABLE sample STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), //CTAS query with serde - ImmutableList.of("CREATE TABLE sample ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", - "CREATE TABLE `sample` ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), + ImmutableList.of( + "CREATE TABLE sample ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`"), //CTAS query with wow format delimiter fields - ImmutableList.of("CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", - "CREATE TABLE `sample` ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`")); + ImmutableList.of( + "CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM tmp", + "CREATE TABLE `sample` ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM `tmp`")); return convertAndValidateSql.stream().map(x -> new Object[] { x.get(0), x.get(1) }).iterator(); } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index c980b8205..a12f35ea5 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -6,14 +6,13 @@ package com.linkedin.coral.coralservice.utils; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlNode; +import com.linkedin.coral.common.calcite.sql.SqlCommand; import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.spark.CoralSpark; import com.linkedin.coral.trino.rel2trino.RelToTrinoConverter; import com.linkedin.coral.trino.trino2rel.TrinoToRelConverter; -import org.apache.calcite.sql.SqlNode; - -import java.util.function.Function; import static com.linkedin.coral.coralservice.utils.CoralProvider.*; @@ -34,7 +33,19 @@ public static String translateHiveToTrino(String query) { public static String translateHiveToSpark(String query) { HiveToRelConverter hiveToRelConverter = new HiveToRelConverter(hiveMetastoreClient); SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); - Function hiveSqlNodeToRelConverter = hiveToRelConverter::toRel; - return CoralSpark.create(sqlNode, hiveSqlNodeToRelConverter).getSparkSql(); + if (sqlNode instanceof SqlCommand) { + SqlNode selectNode = ((SqlCommand) sqlNode).getSelectQuery(); + SqlNode selectSparkNode = convertHiveSqlNodeToCoralNode(hiveToRelConverter, selectNode); + ((SqlCommand) sqlNode).setSelectQuery(selectSparkNode); + } else { + sqlNode = convertHiveSqlNodeToCoralNode(hiveToRelConverter, sqlNode); + } + return CoralSpark.constructSparkSQL(sqlNode); + } + + private static SqlNode convertHiveSqlNodeToCoralNode(HiveToRelConverter hiveToRelConverter, SqlNode sqlNode) { + RelNode relNode = hiveToRelConverter.toRel(sqlNode); + SqlNode coralSqlNode = CoralSpark.getCoralSqlNode(relNode); + return coralSqlNode; } } diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 404443a1f..f3b5a677b 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -9,17 +9,16 @@ import java.util.function.Function; import java.util.stream.Collectors; -import com.linkedin.coral.common.calcite.sql.SqlCommand; -import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import org.apache.avro.Schema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.sql.SqlCreate; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlSelect; +import com.linkedin.coral.common.calcite.sql.SqlCommand; +import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import com.linkedin.coral.spark.containers.SparkRelInfo; import com.linkedin.coral.spark.containers.SparkUDFInfo; import com.linkedin.coral.spark.dialect.SparkSqlDialect; @@ -74,6 +73,22 @@ public static CoralSpark create(RelNode irRelNode) { return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); } + /** + * Users use this function to get CoralSqlNode from CoralRelNode + * This should be used when user need to get CoralSqlNode from CoralRelNode by applying + * spark specific transformations on CoralRelNode + * with Coral-schema output schema + * + * @return [[SqlNode]] + */ + public static SqlNode getCoralSqlNode(RelNode irRelNode) { + SparkRelInfo sparkRelInfo = IRRelToSparkRelTransformer.transform(irRelNode); + RelNode sparkRelNode = sparkRelInfo.getSparkRelNode(); + CoralRelToSqlNodeConverter rel2sql = new CoralRelToSqlNodeConverter(); + SqlNode coralSqlNode = rel2sql.convert(sparkRelNode); + return coralSqlNode; + } + /** * Users use this function as the main API for getting CoralSpark instance. * This should be used when user need to align the Coral-spark translated SQL @@ -119,6 +134,24 @@ private static String constructSparkSQL(RelNode sparkRelNode) { return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); } + /** + * This function returns a completely expanded SQL statement in Spark SQL Dialect. + * + * A SQL statement is 'completely expanded' if it doesn't depend + * on (or selects from) Hive views, but instead, just on base tables. + + * Converts CoralSqlNode to Spark SQL + * + * @param sqlNode CoralSqlNode which will be translated to SparkSql + * + * @return SQL String in Spark SQL dialect which is 'completely expanded' + */ + public static String constructSparkSQL(SqlNode sqlNode) { + SqlNode sparkSqlNode = sqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()); + SqlNode rewrittenSparkSqlNode = sparkSqlNode.accept(new SparkSqlRewriter()); + return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); + } + /** * Users use this function as the main API for getting CoralSpark instance. * @@ -136,21 +169,22 @@ private static String constructSparkSQL(RelNode sparkRelNode) { * * @return [[CoralSparkInfo]] */ - public static CoralSpark create(SqlNode sqlNode, Function convertor){ + public static CoralSpark create(SqlNode sqlNode, Function convertor) { SparkRelInfo sparkRelInfo; //apply RelNode transformations for sqlNode eligible for transformation. - if(sqlNode instanceof SqlCommand) { + if (sqlNode instanceof SqlCommand) { SqlNode selectNode = ((SqlCreateTable) sqlNode).getSelectQuery(); sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(selectNode)); selectNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); ((SqlCreateTable) sqlNode).setSelectQuery(selectNode); } else { + sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(sqlNode)); sqlNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); } // sqlNode to sparkSQL - String sparkSQL = sqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()) - .accept(new SparkSqlRewriter()).toSqlString(SparkSqlDialect.INSTANCE).getSql(); + String sparkSQL = sqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()).accept(new SparkSqlRewriter()) + .toSqlString(SparkSqlDialect.INSTANCE).getSql(); List baseTables = constructBaseTables(sparkRelInfo.getSparkRelNode()); List sparkUDFInfos = sparkRelInfo.getSparkUDFInfoList(); return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); From 70276405029c72095e0732f3ec3d6f6b4e89b089 Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Tue, 15 Nov 2022 22:22:09 +0530 Subject: [PATCH 10/11] removing unnecessary functions --- .../com/linkedin/coral/spark/CoralSpark.java | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index f3b5a677b..169576adc 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -152,44 +152,6 @@ public static String constructSparkSQL(SqlNode sqlNode) { return rewrittenSparkSqlNode.toSqlString(SparkSqlDialect.INSTANCE).getSql(); } - /** - * Users use this function as the main API for getting CoralSpark instance. - * - * Internally Appropriate parts of Sql RelNode is converted to Spark RelNode, Spark RelNode is converted back - * to SqlNode and SqlNode to SparkSQL. - * - * It returns an instance of CoralSpark which contains - * 1) Spark SQL - * 2) Base tables - * 3) Spark UDF information objects, ie. List of {@link SparkUDFInfo} - * - * @param sqlNode CoralNode which will be translated to SparkSQL. - * @param convertor Functional Interface to convert SqlNode to appropriate RelNode - * - * - * @return [[CoralSparkInfo]] - */ - public static CoralSpark create(SqlNode sqlNode, Function convertor) { - SparkRelInfo sparkRelInfo; - //apply RelNode transformations for sqlNode eligible for transformation. - if (sqlNode instanceof SqlCommand) { - SqlNode selectNode = ((SqlCreateTable) sqlNode).getSelectQuery(); - sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(selectNode)); - selectNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); - ((SqlCreateTable) sqlNode).setSelectQuery(selectNode); - } else { - - sparkRelInfo = IRRelToSparkRelTransformer.transform(convertor.apply(sqlNode)); - sqlNode = new CoralRelToSqlNodeConverter().convert(sparkRelInfo.getSparkRelNode()); - } - // sqlNode to sparkSQL - String sparkSQL = sqlNode.accept(new CoralSqlNodeToSparkSqlNodeConverter()).accept(new SparkSqlRewriter()) - .toSqlString(SparkSqlDialect.INSTANCE).getSql(); - List baseTables = constructBaseTables(sparkRelInfo.getSparkRelNode()); - List sparkUDFInfos = sparkRelInfo.getSparkUDFInfoList(); - return new CoralSpark(baseTables, sparkUDFInfos, sparkSQL); - } - private static String constructSparkSQLWithExplicitAlias(RelNode sparkRelNode, List aliases) { CoralRelToSqlNodeConverter rel2sql = new CoralRelToSqlNodeConverter(); // Create temporary objects r and rewritten to make debugging easier From 4ccd1ced68fd1f63e9540ef0d8b055808fbe032e Mon Sep 17 00:00:00 2001 From: Nimesh Khandelwal Date: Wed, 16 Nov 2022 18:20:48 +0530 Subject: [PATCH 11/11] add basic uts --- .../com/linkedin/coral/spark/CoralSpark.java | 3 -- .../linkedin/coral/spark/CoralSparkTest.java | 47 +++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 169576adc..bdae06085 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -6,7 +6,6 @@ package com.linkedin.coral.spark; import java.util.List; -import java.util.function.Function; import java.util.stream.Collectors; import org.apache.avro.Schema; @@ -17,8 +16,6 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlSelect; -import com.linkedin.coral.common.calcite.sql.SqlCommand; -import com.linkedin.coral.common.calcite.sql.SqlCreateTable; import com.linkedin.coral.spark.containers.SparkRelInfo; import com.linkedin.coral.spark.containers.SparkUDFInfo; import com.linkedin.coral.spark.dialect.SparkSqlDialect; diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index b2431f774..c5cde16b5 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -14,6 +14,7 @@ import org.apache.avro.Schema; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; @@ -26,6 +27,8 @@ import org.testng.annotations.Test; import com.linkedin.coral.com.google.common.collect.ImmutableList; +import com.linkedin.coral.common.calcite.sql.SqlCommand; +import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry; import com.linkedin.coral.spark.containers.SparkUDFInfo; import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; @@ -702,6 +705,32 @@ public void testCastDecimal() { assertEquals(CoralSpark.create(relNode).getSparkSql(), targetSql); } + @Test + public void testCreateTableAsSelectWithUnionExtractUDF() { + String query = "CREATE TABLE foo_bar as SELECT extract_union(foo) from union_table"; + String targetSql = "CREATE TABLE foo_bar as SELECT coalesce_struct(foo) FROM default.union_table"; + assertEquals(translateHiveToSpark(query).toLowerCase().replaceAll("\n", " "), + targetSql.toLowerCase().replaceAll("\n", " ")); + } + + @Test + public void testCreateTableAsSelect() { + String query = "CREATE TABLE foo_bar as SELECT CAST(a AS DECIMAL(10, 0)) casted_decimal FROM default.foo"; + String targetSql = "CREATE TABLE foo_bar as SELECT CAST(a AS DECIMAL(10, 0)) casted_decimal FROM default.foo"; + assertEquals(translateHiveToSpark(query).toLowerCase().replaceAll("\n", " "), + targetSql.toLowerCase().replaceAll("\n", " ")); + } + + @Test + public void testCreateTableAsSelectWithTableProperties() { + String query = + "CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM default.foo"; + String targetSql = + "CREATE TABLE sample ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' AS SELECT * FROM default.foo"; + assertEquals(translateHiveToSpark(query).toLowerCase().replaceAll("\n", " "), + targetSql.toLowerCase().replaceAll("\n", " ")); + } + @Test public void testCastDecimalDefault() { RelNode relNode = TestUtils.toRelNode("SELECT CAST(a as DECIMAL) as casted_decimal FROM default.foo"); @@ -856,4 +885,22 @@ private static String getCoralSparkTranslatedSqlWithAliasFromCoralSchema(String return coralSpark.getSparkSql(); } + private static SqlNode convertHiveSqlNodeToCoralNode(HiveToRelConverter hiveToRelConverter, SqlNode sqlNode) { + RelNode relNode = hiveToRelConverter.toRel(sqlNode); + SqlNode coralSqlNode = CoralSpark.getCoralSqlNode(relNode); + return coralSqlNode; + } + + private static String translateHiveToSpark(String query) { + HiveToRelConverter hiveToRelConverter = TestUtils.hiveToRelConverter; + SqlNode sqlNode = hiveToRelConverter.toSqlNode(query); + if (sqlNode instanceof SqlCommand) { + SqlNode selectNode = ((SqlCommand) sqlNode).getSelectQuery(); + SqlNode selectSparkNode = convertHiveSqlNodeToCoralNode(hiveToRelConverter, selectNode); + ((SqlCommand) sqlNode).setSelectQuery(selectSparkNode); + } else { + sqlNode = convertHiveSqlNodeToCoralNode(hiveToRelConverter, sqlNode); + } + return CoralSpark.constructSparkSQL(sqlNode); + } }