From a80f59057462a2dd5b2295743a4542ee8084fe66 Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Wed, 26 Apr 2023 12:42:24 -0700 Subject: [PATCH 01/21] Setup coral-incremental logic for nested query support --- .../IncrementalTransformerResults.java | 76 +++++++++++++ .../RelNodeIncrementalTransformer.java | 101 +++++++++++++++--- .../RelToIncrementalSqlConverterTest.java | 47 ++------ .../linkedin/coral/incremental/TestUtils.java | 4 + .../coralservice/utils/IncrementalUtils.java | 5 +- 5 files changed, 180 insertions(+), 53 deletions(-) create mode 100644 coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java new file mode 100644 index 000000000..19a52e00e --- /dev/null +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java @@ -0,0 +1,76 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.incremental; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.calcite.rel.RelNode; + + +public class IncrementalTransformerResults { + + private RelNode incrementalRelNode; + private RelNode refreshRelNode; + private Map intermediateQueryRelNodes; + + public IncrementalTransformerResults() { + incrementalRelNode = null; + refreshRelNode = null; + intermediateQueryRelNodes = new HashMap<>(); + } + + public boolean existsIncrementalRelNode() { + return incrementalRelNode != null; + } + + public RelNode getIncrementalRelNode() { + return incrementalRelNode; + } + + public boolean existsRefreshRelNode() { + return refreshRelNode != null; + } + + public RelNode getRefreshRelNode() { + return refreshRelNode; + } + + public Map getIntermediateQueryRelNodes() { + return intermediateQueryRelNodes; + } + + public boolean containsIntermediateQueryRelNodeKey(String name) { + return intermediateQueryRelNodes.containsKey(name); + } + + public RelNode getIntermediateQueryRelNodeCorrespondingToKey(String name) { + return intermediateQueryRelNodes.get(name); + } + + public void setIncrementalRelNode(RelNode incrementalRelNode) { + this.incrementalRelNode = incrementalRelNode; + } + + public void setRefreshRelNode(RelNode refreshRelNode) { + this.refreshRelNode = refreshRelNode; + } + + public void setIntermediateQueryRelNodes(Map intermediateQueryRelNodes) { + this.intermediateQueryRelNodes = intermediateQueryRelNodes; + } + + public void addIntermediateQueryRelNode(String name, RelNode intermediateRelNode) { + this.intermediateQueryRelNodes.put(name, intermediateRelNode); + } + + public void addMultipleIntermediateQueryRelNodes(Map intermediateQueryRelNodes) { + if (intermediateQueryRelNodes != null) { + this.intermediateQueryRelNodes.putAll(intermediateQueryRelNodes); + } + } + +} diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 5b59c11d1..1184d8c3f 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -7,10 +7,12 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.prepare.RelOptTableImpl; import org.apache.calcite.rel.RelNode; @@ -29,14 +31,29 @@ public class RelNodeIncrementalTransformer { + private static RelOptSchema relOptSchema; + private RelNodeIncrementalTransformer() { } - public static RelNode convertRelIncremental(RelNode originalNode) { + public static IncrementalTransformerResults performIncrementalTransformation(RelNode originalNode) { + IncrementalTransformerResults incrementalTransformerResults = convertRelIncremental(originalNode); + return incrementalTransformerResults; + } + + private static IncrementalTransformerResults convertRelIncremental(RelNode originalNode) { + IncrementalTransformerResults incrementalTransformerResults = new IncrementalTransformerResults(); RelShuttle converter = new RelShuttleImpl() { @Override public RelNode visit(TableScan scan) { RelOptTable originalTable = scan.getTable(); + + // Set relOptSchema + if (relOptSchema == null) { + relOptSchema = originalTable.getRelOptSchema(); + } + + // Create delta scan List incrementalNames = new ArrayList<>(originalTable.getQualifiedName()); String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + "_delta"; incrementalNames.add(deltaTableName); @@ -49,11 +66,31 @@ public RelNode visit(TableScan scan) { public RelNode visit(LogicalJoin join) { RelNode left = join.getLeft(); RelNode right = join.getRight(); - RelNode incrementalLeft = convertRelIncremental(left); - RelNode incrementalRight = convertRelIncremental(right); + IncrementalTransformerResults incrementalTransformerResultsLeft = convertRelIncremental(left); + IncrementalTransformerResults incrementalTransformerResultsRight = convertRelIncremental(right); + RelNode incrementalLeft = incrementalTransformerResultsLeft.getIncrementalRelNode(); + RelNode incrementalRight = incrementalTransformerResultsRight.getIncrementalRelNode(); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsLeft.getIntermediateQueryRelNodes()); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsRight.getIntermediateQueryRelNodes()); RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + // Check if we can replace the left and right nodes with a scan of a materialized table + if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(left.getDescription())) { + LogicalProject leftLastProject = createReplacementProjectNodeForGivenRelNode(left, rexBuilder); + left = leftLastProject; + LogicalProject leftDeltaProject = createReplacementProjectNodeForGivenRelNode(incrementalLeft, rexBuilder); + incrementalLeft = leftDeltaProject; + } + if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(right.getDescription())) { + LogicalProject rightLastProject = createReplacementProjectNodeForGivenRelNode(right, rexBuilder); + right = rightLastProject; + LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode(incrementalRight, rexBuilder); + incrementalRight = rightDeltaProject; + } + LogicalProject p1 = createProjectOverJoin(join, left, incrementalRight, rexBuilder); LogicalProject p2 = createProjectOverJoin(join, incrementalLeft, right, rexBuilder); LogicalProject p3 = createProjectOverJoin(join, incrementalLeft, incrementalRight, rexBuilder); @@ -65,45 +102,77 @@ public RelNode visit(LogicalJoin join) { @Override public RelNode visit(LogicalFilter filter) { - RelNode transformedChild = convertRelIncremental(filter.getInput()); + IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(filter.getInput()); + RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); return LogicalFilter.create(transformedChild, filter.getCondition()); } @Override public RelNode visit(LogicalProject project) { - RelNode transformedChild = convertRelIncremental(project.getInput()); - return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(project.getInput()); + RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); + incrementalTransformerResults.addIntermediateQueryRelNode(project.getDescription(), project); + LogicalProject transformedProject = + LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + incrementalTransformerResults.addIntermediateQueryRelNode(transformedProject.getDescription(), + transformedProject); + return transformedProject; } @Override public RelNode visit(LogicalUnion union) { List children = union.getInputs(); - List transformedChildren = + List incrementalTransformerResultsChildren = children.stream().map(child -> convertRelIncremental(child)).collect(Collectors.toList()); + List transformedChildren = new ArrayList<>(); + for (IncrementalTransformerResults incrementalTransformerResultsChild : incrementalTransformerResultsChildren) { + transformedChildren.add(incrementalTransformerResultsChild.getIncrementalRelNode()); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); + } return LogicalUnion.create(transformedChildren, union.all); } @Override public RelNode visit(LogicalAggregate aggregate) { - RelNode transformedChild = convertRelIncremental(aggregate.getInput()); + IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(aggregate.getInput()); + RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); + incrementalTransformerResults + .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList()); } }; - return originalNode.accept(converter); + incrementalTransformerResults.setIncrementalRelNode(originalNode.accept(converter)); + return incrementalTransformerResults; + } + + private static LogicalProject createReplacementProjectNodeForGivenRelNode(RelNode relNode, RexBuilder rexBuilder) { + RelOptTable table = RelOptTableImpl.create(relOptSchema, relNode.getRowType(), + Collections.singletonList(relNode.getDescription()), null); + TableScan scan = LogicalTableScan.create(relNode.getCluster(), table); + return createProjectOverNode(scan, rexBuilder); + } + + private static LogicalProject createProjectOverNode(RelNode relNode, RexBuilder rexBuilder) { + ArrayList projects = new ArrayList<>(); + ArrayList names = new ArrayList<>(); + IntStream.range(0, relNode.getRowType().getFieldList().size()).forEach(i -> { + projects.add(rexBuilder.makeInputRef(relNode, i)); + names.add(relNode.getRowType().getFieldNames().get(i)); + }); + return LogicalProject.create(relNode, projects, names); } private static LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right, RexBuilder rexBuilder) { LogicalJoin incrementalJoin = LogicalJoin.create(left, right, join.getCondition(), join.getVariablesSet(), join.getJoinType()); - ArrayList projects = new ArrayList<>(); - ArrayList names = new ArrayList<>(); - IntStream.range(0, incrementalJoin.getRowType().getFieldList().size()).forEach(i -> { - projects.add(rexBuilder.makeInputRef(incrementalJoin, i)); - names.add(incrementalJoin.getRowType().getFieldNames().get(i)); - }); - return LogicalProject.create(incrementalJoin, projects, names); + return createProjectOverNode(incrementalJoin, rexBuilder); } } diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 3ac0cd683..1b844b42b 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -41,7 +41,9 @@ public void afterClass() throws IOException { } public String convert(RelNode relNode) { - RelNode incrementalRelNode = RelNodeIncrementalTransformer.convertRelIncremental(relNode); + IncrementalTransformerResults incrementalTransformerResults = + RelNodeIncrementalTransformer.performIncrementalTransformation(relNode); + RelNode incrementalRelNode = incrementalTransformerResults.getIncrementalRelNode(); CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); SqlNode sqlNode = converter.convert(incrementalRelNode); return sqlNode.toSqlString(converter.INSTANCE).getSql(); @@ -81,41 +83,6 @@ public void testJoinWithFilter() { assertEquals(getIncrementalModification(sql), expected); } - @Test - public void testJoinWithNestedFilter() { - String sql = - "WITH tmp AS (SELECT * from test.bar1 WHERE test.bar1.x > 10), tmp2 AS (SELECT * from test.bar2) SELECT * FROM tmp JOIN tmp2 ON tmp.x = tmp2.x"; - String expected = "SELECT *\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar1\n" - + "WHERE bar1.x > 10) AS t\n" + "INNER JOIN test.bar2_delta AS bar2_delta ON t.x = bar2_delta.x\n" - + "UNION ALL\n" + "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1_delta AS bar1_delta\n" - + "WHERE bar1_delta.x > 10) AS t0\n" + "INNER JOIN test.bar2 AS bar2 ON t0.x = bar2.x) AS t1\n" + "UNION ALL\n" - + "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1_delta AS bar1_delta0\n" - + "WHERE bar1_delta0.x > 10) AS t2\n" + "INNER JOIN test.bar2_delta AS bar2_delta0 ON t2.x = bar2_delta0.x"; - assertEquals(getIncrementalModification(sql), expected); - } - - @Test - public void testNestedJoin() { - String sql = - "WITH tmp AS (SELECT * FROM test.bar1 INNER JOIN test.bar2 ON test.bar1.x = test.bar2.x) SELECT * FROM tmp INNER JOIN test.bar3 ON tmp.x = test.bar3.x"; - String expected = "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar1\n" - + "INNER JOIN test.bar2 AS bar2 ON bar1.x = bar2.x\n" - + "INNER JOIN test.bar3_delta AS bar3_delta ON bar1.x = bar3_delta.x\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar10\n" - + "INNER JOIN test.bar2_delta AS bar2_delta ON bar10.x = bar2_delta.x\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM test.bar1_delta AS bar1_delta\n" + "INNER JOIN test.bar2 AS bar20 ON bar1_delta.x = bar20.x) AS t\n" - + "UNION ALL\n" + "SELECT *\n" + "FROM test.bar1_delta AS bar1_delta0\n" - + "INNER JOIN test.bar2_delta AS bar2_delta0 ON bar1_delta0.x = bar2_delta0.x) AS t0\n" - + "INNER JOIN test.bar3 AS bar3 ON t0.x = bar3.x) AS t1\n" + "UNION ALL\n" + "SELECT *\n" + "FROM (SELECT *\n" - + "FROM (SELECT *\n" + "FROM test.bar1 AS bar11\n" - + "INNER JOIN test.bar2_delta AS bar2_delta1 ON bar11.x = bar2_delta1.x\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM test.bar1_delta AS bar1_delta1\n" + "INNER JOIN test.bar2 AS bar21 ON bar1_delta1.x = bar21.x) AS t2\n" - + "UNION ALL\n" + "SELECT *\n" + "FROM test.bar1_delta AS bar1_delta2\n" - + "INNER JOIN test.bar2_delta AS bar2_delta2 ON bar1_delta2.x = bar2_delta2.x) AS t3\n" - + "INNER JOIN test.bar3_delta AS bar3_delta0 ON t3.x = bar3_delta0.x"; - assertEquals(getIncrementalModification(sql), expected); - } - @Test public void testUnion() { String sql = "SELECT * FROM test.bar1 UNION SELECT * FROM test.bar2 UNION SELECT * FROM test.bar3"; @@ -143,4 +110,12 @@ public void testSelectSpecificJoin() { + "INNER JOIN test.bar2_delta AS bar2_delta0 ON bar1_delta0.x = bar2_delta0.x) AS t0"; assertEquals(getIncrementalModification(sql), expected); } + + @Test + public void testJoinOverJoin() { + // Debugging only + String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; + getIncrementalModification(sql); + } } diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java index 232705ed4..7496126c4 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java @@ -60,6 +60,10 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce run(driver, "CREATE TABLE IF NOT EXISTS test.bar1(x int, y double)"); run(driver, "CREATE TABLE IF NOT EXISTS test.bar2(x int, y double)"); run(driver, "CREATE TABLE IF NOT EXISTS test.bar3(x int, y double)"); + + run(driver, "CREATE TABLE IF NOT EXISTS test.alpha(a1 int, a2 double)"); + run(driver, "CREATE TABLE IF NOT EXISTS test.beta(b1 int, b2 double)"); + run(driver, "CREATE TABLE IF NOT EXISTS test.gamma(g1 int, g2 double)"); } public static HiveConf loadResourceHiveConf() { diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java index 33fbc9023..61cb3eb44 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java @@ -8,6 +8,7 @@ import org.apache.calcite.rel.RelNode; import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; +import com.linkedin.coral.incremental.IncrementalTransformerResults; import com.linkedin.coral.incremental.RelNodeIncrementalTransformer; import com.linkedin.coral.spark.CoralSpark; @@ -18,7 +19,9 @@ public class IncrementalUtils { public static String getSparkIncrementalQueryFromUserSql(String query) { RelNode originalNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); - RelNode incrementalRelNode = RelNodeIncrementalTransformer.convertRelIncremental(originalNode); + IncrementalTransformerResults incrementalTransformerResults = + RelNodeIncrementalTransformer.performIncrementalTransformation(originalNode); + RelNode incrementalRelNode = incrementalTransformerResults.getIncrementalRelNode(); CoralSpark coralSpark = CoralSpark.create(incrementalRelNode); return coralSpark.getSparkSql(); } From 69fef78552a0e9ab8c95f48453bacf46e93ce001 Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Wed, 26 Apr 2023 15:01:10 -0700 Subject: [PATCH 02/21] Clarify replaced temp view last and delta pairs --- .../RelNodeIncrementalTransformer.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 1184d8c3f..7f44d7c33 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -79,15 +79,19 @@ public RelNode visit(LogicalJoin join) { // Check if we can replace the left and right nodes with a scan of a materialized table if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(left.getDescription())) { - LogicalProject leftLastProject = createReplacementProjectNodeForGivenRelNode(left, rexBuilder); + String description = left.getDescription(); + LogicalProject leftLastProject = createReplacementProjectNodeForGivenRelNode(description, left, rexBuilder); left = leftLastProject; - LogicalProject leftDeltaProject = createReplacementProjectNodeForGivenRelNode(incrementalLeft, rexBuilder); + LogicalProject leftDeltaProject = + createReplacementProjectNodeForGivenRelNode(description + "_delta", incrementalLeft, rexBuilder); incrementalLeft = leftDeltaProject; } if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(right.getDescription())) { - LogicalProject rightLastProject = createReplacementProjectNodeForGivenRelNode(right, rexBuilder); + LogicalProject rightLastProject = + createReplacementProjectNodeForGivenRelNode(right.getDescription(), right, rexBuilder); right = rightLastProject; - LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode(incrementalRight, rexBuilder); + LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode( + right.getDescription() + "_delta", incrementalRight, rexBuilder); incrementalRight = rightDeltaProject; } @@ -118,7 +122,7 @@ public RelNode visit(LogicalProject project) { incrementalTransformerResults.addIntermediateQueryRelNode(project.getDescription(), project); LogicalProject transformedProject = LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); - incrementalTransformerResults.addIntermediateQueryRelNode(transformedProject.getDescription(), + incrementalTransformerResults.addIntermediateQueryRelNode(project.getDescription() + "_delta", transformedProject); return transformedProject; } @@ -151,9 +155,10 @@ public RelNode visit(LogicalAggregate aggregate) { return incrementalTransformerResults; } - private static LogicalProject createReplacementProjectNodeForGivenRelNode(RelNode relNode, RexBuilder rexBuilder) { - RelOptTable table = RelOptTableImpl.create(relOptSchema, relNode.getRowType(), - Collections.singletonList(relNode.getDescription()), null); + private static LogicalProject createReplacementProjectNodeForGivenRelNode(String relOptTableName, RelNode relNode, + RexBuilder rexBuilder) { + RelOptTable table = + RelOptTableImpl.create(relOptSchema, relNode.getRowType(), Collections.singletonList(relOptTableName), null); TableScan scan = LogicalTableScan.create(relNode.getCluster(), table); return createProjectOverNode(scan, rexBuilder); } From 8289b6bc6799fc242a94c9f6221a9ac3c35bd48b Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Wed, 26 Apr 2023 15:53:47 -0700 Subject: [PATCH 03/21] Update nested join test case --- .../RelNodeIncrementalTransformer.java | 26 ++++++++++++------- .../RelToIncrementalSqlConverterTest.java | 9 +++++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 7f44d7c33..d62702fdc 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -78,20 +78,22 @@ public RelNode visit(LogicalJoin join) { RexBuilder rexBuilder = join.getCluster().getRexBuilder(); // Check if we can replace the left and right nodes with a scan of a materialized table - if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(left.getDescription())) { - String description = left.getDescription(); + if (incrementalTransformerResults + .containsIntermediateQueryRelNodeKey(getTableNameFromProjectDescription(left))) { + String description = getTableNameFromProjectDescription(left); LogicalProject leftLastProject = createReplacementProjectNodeForGivenRelNode(description, left, rexBuilder); left = leftLastProject; LogicalProject leftDeltaProject = createReplacementProjectNodeForGivenRelNode(description + "_delta", incrementalLeft, rexBuilder); incrementalLeft = leftDeltaProject; } - if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(right.getDescription())) { - LogicalProject rightLastProject = - createReplacementProjectNodeForGivenRelNode(right.getDescription(), right, rexBuilder); + if (incrementalTransformerResults + .containsIntermediateQueryRelNodeKey(getTableNameFromProjectDescription(right))) { + String description = getTableNameFromProjectDescription(right); + LogicalProject rightLastProject = createReplacementProjectNodeForGivenRelNode(description, right, rexBuilder); right = rightLastProject; - LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode( - right.getDescription() + "_delta", incrementalRight, rexBuilder); + LogicalProject rightDeltaProject = + createReplacementProjectNodeForGivenRelNode(description + "_delta", incrementalRight, rexBuilder); incrementalRight = rightDeltaProject; } @@ -119,11 +121,11 @@ public RelNode visit(LogicalProject project) { RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); incrementalTransformerResults .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); - incrementalTransformerResults.addIntermediateQueryRelNode(project.getDescription(), project); + incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromProjectDescription(project), project); LogicalProject transformedProject = LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); - incrementalTransformerResults.addIntermediateQueryRelNode(project.getDescription() + "_delta", - transformedProject); + incrementalTransformerResults + .addIntermediateQueryRelNode(getTableNameFromProjectDescription(project) + "_delta", transformedProject); return transformedProject; } @@ -155,6 +157,10 @@ public RelNode visit(LogicalAggregate aggregate) { return incrementalTransformerResults; } + private static String getTableNameFromProjectDescription(RelNode relNode) { + return relNode.getDescription().replaceAll("LogicalProject", "Table"); + } + private static LogicalProject createReplacementProjectNodeForGivenRelNode(String relOptTableName, RelNode relNode, RexBuilder rexBuilder) { RelOptTable table = diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 1b844b42b..164daf17b 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -113,9 +113,14 @@ public void testSelectSpecificJoin() { @Test public void testJoinOverJoin() { - // Debugging only String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; - getIncrementalModification(sql); + String expected = "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#4 AS Table#4\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#4.a2 = gamma_delta.g2\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#4_delta AS Table#4_delta\n" + + "INNER JOIN test.gamma AS gamma ON Table#4_delta.a2 = gamma.g2) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#4_delta AS Table#4_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#4_delta0.a2 = gamma_delta0.g2) AS t0"; + assertEquals(getIncrementalModification(sql), expected); } } From 88fa70dab242a4e86070cd38c03906d7d6ce5d47 Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Wed, 26 Apr 2023 18:21:00 -0700 Subject: [PATCH 04/21] Set deterministic naming scheme for intermediate tables --- .../IncrementalTransformerResults.java | 30 ++++++++++---- .../RelNodeIncrementalTransformer.java | 40 +++++++++++-------- .../RelToIncrementalSqlConverterTest.java | 28 +++++++++---- .../linkedin/coral/incremental/TestUtils.java | 1 + 4 files changed, 67 insertions(+), 32 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java index 19a52e00e..611a14f4e 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java @@ -5,7 +5,9 @@ */ package com.linkedin.coral.incremental; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.apache.calcite.rel.RelNode; @@ -16,11 +18,13 @@ public class IncrementalTransformerResults { private RelNode incrementalRelNode; private RelNode refreshRelNode; private Map intermediateQueryRelNodes; + private List intermediateOrderings; public IncrementalTransformerResults() { incrementalRelNode = null; refreshRelNode = null; - intermediateQueryRelNodes = new HashMap<>(); + intermediateQueryRelNodes = new LinkedHashMap<>(); + intermediateOrderings = new ArrayList<>(); } public boolean existsIncrementalRelNode() { @@ -47,8 +51,12 @@ public boolean containsIntermediateQueryRelNodeKey(String name) { return intermediateQueryRelNodes.containsKey(name); } - public RelNode getIntermediateQueryRelNodeCorrespondingToKey(String name) { - return intermediateQueryRelNodes.get(name); + public List getIntermediateOrderings() { + return intermediateOrderings; + } + + public int getIndexOfIntermediateOrdering(String name) { + return intermediateOrderings.indexOf(name); } public void setIncrementalRelNode(RelNode incrementalRelNode) { @@ -59,18 +67,24 @@ public void setRefreshRelNode(RelNode refreshRelNode) { this.refreshRelNode = refreshRelNode; } - public void setIntermediateQueryRelNodes(Map intermediateQueryRelNodes) { - this.intermediateQueryRelNodes = intermediateQueryRelNodes; - } - public void addIntermediateQueryRelNode(String name, RelNode intermediateRelNode) { this.intermediateQueryRelNodes.put(name, intermediateRelNode); + addIntermediateOrdering(name); } public void addMultipleIntermediateQueryRelNodes(Map intermediateQueryRelNodes) { if (intermediateQueryRelNodes != null) { this.intermediateQueryRelNodes.putAll(intermediateQueryRelNodes); + addMultipleIntermediateOrderings(new ArrayList<>(intermediateQueryRelNodes.keySet())); } } + public void addIntermediateOrdering(String intermediateOrdering) { + this.intermediateOrderings.add(intermediateOrdering); + } + + public void addMultipleIntermediateOrderings(List intermediateOrderings) { + this.intermediateOrderings.addAll(intermediateOrderings); + } + } diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index d62702fdc..e0eb9b1ab 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -78,22 +78,26 @@ public RelNode visit(LogicalJoin join) { RexBuilder rexBuilder = join.getCluster().getRexBuilder(); // Check if we can replace the left and right nodes with a scan of a materialized table - if (incrementalTransformerResults - .containsIntermediateQueryRelNodeKey(getTableNameFromProjectDescription(left))) { - String description = getTableNameFromProjectDescription(left); - LogicalProject leftLastProject = createReplacementProjectNodeForGivenRelNode(description, left, rexBuilder); + if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(getTableNameFromDescription(left))) { + String description = getTableNameFromDescription(left); + String deterministicDescription = + "Table#" + incrementalTransformerResults.getIndexOfIntermediateOrdering(description); + LogicalProject leftLastProject = + createReplacementProjectNodeForGivenRelNode(deterministicDescription, left, rexBuilder); left = leftLastProject; - LogicalProject leftDeltaProject = - createReplacementProjectNodeForGivenRelNode(description + "_delta", incrementalLeft, rexBuilder); + LogicalProject leftDeltaProject = createReplacementProjectNodeForGivenRelNode( + deterministicDescription + "_delta", incrementalLeft, rexBuilder); incrementalLeft = leftDeltaProject; } - if (incrementalTransformerResults - .containsIntermediateQueryRelNodeKey(getTableNameFromProjectDescription(right))) { - String description = getTableNameFromProjectDescription(right); - LogicalProject rightLastProject = createReplacementProjectNodeForGivenRelNode(description, right, rexBuilder); + if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(getTableNameFromDescription(right))) { + String description = getTableNameFromDescription(right); + String deterministicDescription = + "Table#" + incrementalTransformerResults.getIndexOfIntermediateOrdering(description); + LogicalProject rightLastProject = + createReplacementProjectNodeForGivenRelNode(deterministicDescription, right, rexBuilder); right = rightLastProject; - LogicalProject rightDeltaProject = - createReplacementProjectNodeForGivenRelNode(description + "_delta", incrementalRight, rexBuilder); + LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode( + deterministicDescription + "_delta", incrementalRight, rexBuilder); incrementalRight = rightDeltaProject; } @@ -103,6 +107,7 @@ public RelNode visit(LogicalJoin join) { LogicalUnion unionAllJoins = LogicalUnion.create(Arrays.asList(LogicalUnion.create(Arrays.asList(p1, p2), true), p3), true); + return unionAllJoins; } @@ -121,11 +126,11 @@ public RelNode visit(LogicalProject project) { RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); incrementalTransformerResults .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); - incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromProjectDescription(project), project); + incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromDescription(project), project); LogicalProject transformedProject = LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); - incrementalTransformerResults - .addIntermediateQueryRelNode(getTableNameFromProjectDescription(project) + "_delta", transformedProject); + incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromDescription(project) + "_delta", + transformedProject); return transformedProject; } @@ -157,8 +162,9 @@ public RelNode visit(LogicalAggregate aggregate) { return incrementalTransformerResults; } - private static String getTableNameFromProjectDescription(RelNode relNode) { - return relNode.getDescription().replaceAll("LogicalProject", "Table"); + private static String getTableNameFromDescription(RelNode relNode) { + String identifier = relNode.getDescription().split("#")[1]; + return "Table#" + identifier; } private static LogicalProject createReplacementProjectNodeForGivenRelNode(String relOptTableName, RelNode relNode, diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 164daf17b..0033dc858 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -112,15 +112,29 @@ public void testSelectSpecificJoin() { } @Test - public void testJoinOverJoin() { + public void testNestedJoin() { String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; - String expected = "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#4 AS Table#4\n" - + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#4.a2 = gamma_delta.g2\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM Table#4_delta AS Table#4_delta\n" - + "INNER JOIN test.gamma AS gamma ON Table#4_delta.a2 = gamma.g2) AS t\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM Table#4_delta AS Table#4_delta0\n" - + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#4_delta0.a2 = gamma_delta0.g2) AS t0"; + String expected = "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#0.a2 = gamma_delta.g2\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma AS gamma ON Table#0_delta.a2 = gamma.g2) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0"; + assertEquals(getIncrementalModification(sql), expected); + } + + @Test + public void testThreeNestedJoins() { + String nestedJoin1 = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String nestedJoin2 = "SELECT a2, g1 FROM (" + nestedJoin1 + ") AS nj1 JOIN test.gamma ON nj1.a2 = test.gamma.g2"; + String sql = "SELECT g1, e2 FROM (" + nestedJoin2 + ") AS nj2 JOIN test.epsilon ON nj2.g1 = test.epsilon.e1"; + String expected = "SELECT t0.g1, t0.e2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#2 AS Table#2\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta ON Table#2.g1 = epsilon_delta.e1\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#2_delta AS Table#2_delta\n" + + "INNER JOIN test.epsilon AS epsilon ON Table#2_delta.g1 = epsilon.e1) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#2_delta AS Table#2_delta0\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#2_delta0.g1 = epsilon_delta0.e1) AS t0"; assertEquals(getIncrementalModification(sql), expected); } } diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java index 7496126c4..7384be9a8 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java @@ -64,6 +64,7 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce run(driver, "CREATE TABLE IF NOT EXISTS test.alpha(a1 int, a2 double)"); run(driver, "CREATE TABLE IF NOT EXISTS test.beta(b1 int, b2 double)"); run(driver, "CREATE TABLE IF NOT EXISTS test.gamma(g1 int, g2 double)"); + run(driver, "CREATE TABLE IF NOT EXISTS test.epsilon(e1 int, e2 double)"); } public static HiveConf loadResourceHiveConf() { From c3caa1aa0f0fd2e3ff0232a8a20248c560803233 Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Thu, 27 Apr 2023 18:12:20 -0700 Subject: [PATCH 05/21] Refactor logic to be self-contained in transformer and update tests --- .../IncrementalTransformerResults.java | 90 -------- .../RelNodeIncrementalTransformer.java | 213 +++++++++++------- .../RelToIncrementalSqlConverterTest.java | 94 ++++++-- .../coralservice/utils/IncrementalUtils.java | 6 +- 4 files changed, 213 insertions(+), 190 deletions(-) delete mode 100644 coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java deleted file mode 100644 index 611a14f4e..000000000 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/IncrementalTransformerResults.java +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Copyright 2023 LinkedIn Corporation. All rights reserved. - * Licensed under the BSD-2 Clause license. - * See LICENSE in the project root for license information. - */ -package com.linkedin.coral.incremental; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import org.apache.calcite.rel.RelNode; - - -public class IncrementalTransformerResults { - - private RelNode incrementalRelNode; - private RelNode refreshRelNode; - private Map intermediateQueryRelNodes; - private List intermediateOrderings; - - public IncrementalTransformerResults() { - incrementalRelNode = null; - refreshRelNode = null; - intermediateQueryRelNodes = new LinkedHashMap<>(); - intermediateOrderings = new ArrayList<>(); - } - - public boolean existsIncrementalRelNode() { - return incrementalRelNode != null; - } - - public RelNode getIncrementalRelNode() { - return incrementalRelNode; - } - - public boolean existsRefreshRelNode() { - return refreshRelNode != null; - } - - public RelNode getRefreshRelNode() { - return refreshRelNode; - } - - public Map getIntermediateQueryRelNodes() { - return intermediateQueryRelNodes; - } - - public boolean containsIntermediateQueryRelNodeKey(String name) { - return intermediateQueryRelNodes.containsKey(name); - } - - public List getIntermediateOrderings() { - return intermediateOrderings; - } - - public int getIndexOfIntermediateOrdering(String name) { - return intermediateOrderings.indexOf(name); - } - - public void setIncrementalRelNode(RelNode incrementalRelNode) { - this.incrementalRelNode = incrementalRelNode; - } - - public void setRefreshRelNode(RelNode refreshRelNode) { - this.refreshRelNode = refreshRelNode; - } - - public void addIntermediateQueryRelNode(String name, RelNode intermediateRelNode) { - this.intermediateQueryRelNodes.put(name, intermediateRelNode); - addIntermediateOrdering(name); - } - - public void addMultipleIntermediateQueryRelNodes(Map intermediateQueryRelNodes) { - if (intermediateQueryRelNodes != null) { - this.intermediateQueryRelNodes.putAll(intermediateQueryRelNodes); - addMultipleIntermediateOrderings(new ArrayList<>(intermediateQueryRelNodes.keySet())); - } - } - - public void addIntermediateOrdering(String intermediateOrdering) { - this.intermediateOrderings.add(intermediateOrdering); - } - - public void addMultipleIntermediateOrderings(List intermediateOrderings) { - this.intermediateOrderings.addAll(intermediateOrderings); - } - -} diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index e0eb9b1ab..dce497d10 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -8,7 +8,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -31,31 +33,63 @@ public class RelNodeIncrementalTransformer { - private static RelOptSchema relOptSchema; + private final String TABLE_NAME_PREFIX = "Table#"; + private final String DELTA_SUFFIX = "_delta"; - private RelNodeIncrementalTransformer() { + private RelOptSchema relOptSchema; + private Map snapshotRelNodes; + private Map deltaRelNodes; + private RelNode tempLastRelNode; + + public RelNodeIncrementalTransformer() { + relOptSchema = null; + snapshotRelNodes = new LinkedHashMap<>(); + deltaRelNodes = new LinkedHashMap<>(); + tempLastRelNode = null; + } + + /** + * Returns snapshotRelNodes with deterministic keys. + */ + public Map getSnapshotRelNodes() { + Map deterministicSnapshotRelNodes = new LinkedHashMap<>(); + for (String description : snapshotRelNodes.keySet()) { + deterministicSnapshotRelNodes.put(getDeterministicDescriptionFromDescription(description, false), + snapshotRelNodes.get(description)); + } + return deterministicSnapshotRelNodes; } - public static IncrementalTransformerResults performIncrementalTransformation(RelNode originalNode) { - IncrementalTransformerResults incrementalTransformerResults = convertRelIncremental(originalNode); - return incrementalTransformerResults; + /** + * Returns deltaRelNodes with deterministic keys. + */ + public Map getDeltaRelNodes() { + Map deterministicDeltaRelNodes = new LinkedHashMap<>(); + for (String description : deltaRelNodes.keySet()) { + deterministicDeltaRelNodes.put(getDeterministicDescriptionFromDescription(description, true), + deltaRelNodes.get(description)); + } + return deterministicDeltaRelNodes; } - private static IncrementalTransformerResults convertRelIncremental(RelNode originalNode) { - IncrementalTransformerResults incrementalTransformerResults = new IncrementalTransformerResults(); + /** + * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes. + * @param originalNode input RelNode to generate an incremental version for. + */ + public RelNode convertRelIncremental(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @Override public RelNode visit(TableScan scan) { RelOptTable originalTable = scan.getTable(); - // Set relOptSchema + // Set RelNodeIncrementalTransformer class relOptSchema if not already set if (relOptSchema == null) { relOptSchema = originalTable.getRelOptSchema(); } // Create delta scan List incrementalNames = new ArrayList<>(originalTable.getQualifiedName()); - String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + "_delta"; + String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + DELTA_SUFFIX; incrementalNames.add(deltaTableName); RelOptTable incrementalTable = RelOptTableImpl.create(originalTable.getRelOptSchema(), originalTable.getRowType(), incrementalNames, null); @@ -66,41 +100,32 @@ public RelNode visit(TableScan scan) { public RelNode visit(LogicalJoin join) { RelNode left = join.getLeft(); RelNode right = join.getRight(); - IncrementalTransformerResults incrementalTransformerResultsLeft = convertRelIncremental(left); - IncrementalTransformerResults incrementalTransformerResultsRight = convertRelIncremental(right); - RelNode incrementalLeft = incrementalTransformerResultsLeft.getIncrementalRelNode(); - RelNode incrementalRight = incrementalTransformerResultsRight.getIncrementalRelNode(); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsLeft.getIntermediateQueryRelNodes()); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsRight.getIntermediateQueryRelNodes()); + RelNode incrementalLeft = convertRelIncremental(left); + RelNode incrementalRight = convertRelIncremental(right); RexBuilder rexBuilder = join.getCluster().getRexBuilder(); // Check if we can replace the left and right nodes with a scan of a materialized table - if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(getTableNameFromDescription(left))) { - String description = getTableNameFromDescription(left); - String deterministicDescription = - "Table#" + incrementalTransformerResults.getIndexOfIntermediateOrdering(description); - LogicalProject leftLastProject = - createReplacementProjectNodeForGivenRelNode(deterministicDescription, left, rexBuilder); - left = leftLastProject; - LogicalProject leftDeltaProject = createReplacementProjectNodeForGivenRelNode( - deterministicDescription + "_delta", incrementalLeft, rexBuilder); - incrementalLeft = leftDeltaProject; + String leftDescription = getDescriptionFromRelNode(left, false); + String leftIncrementalDescription = getDescriptionFromRelNode(left, true); + if (snapshotRelNodes.containsKey(leftDescription)) { + left = + createTableScanForGivenRelNode(getDeterministicDescriptionFromDescription(leftDescription, false), left); + incrementalLeft = createTableScanForGivenRelNode( + getDeterministicDescriptionFromDescription(leftIncrementalDescription, true), incrementalLeft); } - if (incrementalTransformerResults.containsIntermediateQueryRelNodeKey(getTableNameFromDescription(right))) { - String description = getTableNameFromDescription(right); - String deterministicDescription = - "Table#" + incrementalTransformerResults.getIndexOfIntermediateOrdering(description); - LogicalProject rightLastProject = - createReplacementProjectNodeForGivenRelNode(deterministicDescription, right, rexBuilder); - right = rightLastProject; - LogicalProject rightDeltaProject = createReplacementProjectNodeForGivenRelNode( - deterministicDescription + "_delta", incrementalRight, rexBuilder); - incrementalRight = rightDeltaProject; + String rightDescription = getDescriptionFromRelNode(right, false); + String rightIncrementalDescription = getDescriptionFromRelNode(right, true); + if (snapshotRelNodes.containsKey(rightDescription)) { + right = createTableScanForGivenRelNode(getDeterministicDescriptionFromDescription(rightDescription, false), + right); + incrementalRight = createTableScanForGivenRelNode( + getDeterministicDescriptionFromDescription(rightIncrementalDescription, true), incrementalRight); } + // We need to do this in the join to get potentially updated left and right nodes + tempLastRelNode = createProjectOverJoin(join, left, right, rexBuilder); + LogicalProject p1 = createProjectOverJoin(join, left, incrementalRight, rexBuilder); LogicalProject p2 = createProjectOverJoin(join, incrementalLeft, right, rexBuilder); LogicalProject p3 = createProjectOverJoin(join, incrementalLeft, incrementalRight, rexBuilder); @@ -113,83 +138,113 @@ public RelNode visit(LogicalJoin join) { @Override public RelNode visit(LogicalFilter filter) { - IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(filter.getInput()); - RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); + RelNode transformedChild = convertRelIncremental(filter.getInput()); return LogicalFilter.create(transformedChild, filter.getCondition()); } @Override public RelNode visit(LogicalProject project) { - IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(project.getInput()); - RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); - incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromDescription(project), project); + RelNode transformedChild = convertRelIncremental(project.getInput()); + RelNode materializedProject = getTempLastRelNode(); + if (materializedProject != null) { + snapshotRelNodes.put(getDescriptionFromRelNode(project, false), materializedProject); + } else { + snapshotRelNodes.put(getDescriptionFromRelNode(project, false), project); + } LogicalProject transformedProject = LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); - incrementalTransformerResults.addIntermediateQueryRelNode(getTableNameFromDescription(project) + "_delta", - transformedProject); + deltaRelNodes.put(getDescriptionFromRelNode(project, true), transformedProject); return transformedProject; } @Override public RelNode visit(LogicalUnion union) { List children = union.getInputs(); - List incrementalTransformerResultsChildren = + List transformedChildren = children.stream().map(child -> convertRelIncremental(child)).collect(Collectors.toList()); - List transformedChildren = new ArrayList<>(); - for (IncrementalTransformerResults incrementalTransformerResultsChild : incrementalTransformerResultsChildren) { - transformedChildren.add(incrementalTransformerResultsChild.getIncrementalRelNode()); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); - } return LogicalUnion.create(transformedChildren, union.all); } @Override public RelNode visit(LogicalAggregate aggregate) { - IncrementalTransformerResults incrementalTransformerResultsChild = convertRelIncremental(aggregate.getInput()); - RelNode transformedChild = incrementalTransformerResultsChild.getIncrementalRelNode(); - incrementalTransformerResults - .addMultipleIntermediateQueryRelNodes(incrementalTransformerResultsChild.getIntermediateQueryRelNodes()); + RelNode transformedChild = convertRelIncremental(aggregate.getInput()); return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList()); } }; - incrementalTransformerResults.setIncrementalRelNode(originalNode.accept(converter)); - return incrementalTransformerResults; + return originalNode.accept(converter); + } + + /** + * Returns the tempLastRelNode and sets the variable back to null. Should only be called once for each retrieval + * instance since subsequent consecutive calls will yield null. + */ + private RelNode getTempLastRelNode() { + RelNode currentTempLastRelNode = tempLastRelNode; + tempLastRelNode = null; + return currentTempLastRelNode; } - private static String getTableNameFromDescription(RelNode relNode) { + /** + * Returns the corresponding description for a given RelNode by extracting the identifier (ex. the identifier for + * LogicalProject#22 is 22) and prepending the TABLE_NAME_PREFIX. Depending on the delta value, a delta suffix may be + * appended. + * @param relNode RelNode from which the identifier will be retrieved. + * @param delta configure whether to get the delta name + */ + private String getDescriptionFromRelNode(RelNode relNode, boolean delta) { String identifier = relNode.getDescription().split("#")[1]; - return "Table#" + identifier; + String description = TABLE_NAME_PREFIX + identifier; + if (delta) { + return description + DELTA_SUFFIX; + } + return description; } - private static LogicalProject createReplacementProjectNodeForGivenRelNode(String relOptTableName, RelNode relNode, - RexBuilder rexBuilder) { + /** + * Returns a description based on mapping index order that will stay the same across different runs of the same + * query. The description consists of the table prefix, the index, and optionally, the delta suffix. + * @param description output from calling getDescriptionFromRelNode() + * @param delta configure whether to get the delta name + */ + private String getDeterministicDescriptionFromDescription(String description, boolean delta) { + if (delta) { + List deltaKeyOrdering = new ArrayList<>(deltaRelNodes.keySet()); + return TABLE_NAME_PREFIX + deltaKeyOrdering.indexOf(description) + DELTA_SUFFIX; + } else { + List snapshotKeyOrdering = new ArrayList<>(snapshotRelNodes.keySet()); + return TABLE_NAME_PREFIX + snapshotKeyOrdering.indexOf(description); + } + } + + /** + * Accepts a table name and RelNode and creates a TableScan over the RelNode using the class relOptSchema. + * @param relOptTableName table name corresponding to table to scan over + * @param relNode top-level RelNode that will be replaced with the TableScan + */ + private TableScan createTableScanForGivenRelNode(String relOptTableName, RelNode relNode) { RelOptTable table = RelOptTableImpl.create(relOptSchema, relNode.getRowType(), Collections.singletonList(relOptTableName), null); - TableScan scan = LogicalTableScan.create(relNode.getCluster(), table); - return createProjectOverNode(scan, rexBuilder); + return LogicalTableScan.create(relNode.getCluster(), table); } - private static LogicalProject createProjectOverNode(RelNode relNode, RexBuilder rexBuilder) { + /** Creates a LogicalProject whose input is an incremental LogicalJoin node that is constructed from a left and right + * RelNode and LogicalJoin. + * @param join LogicalJoin to create the incremental join from + * @param left left RelNode child of the incremental join + * @param right right RelNode child of the incremental join + * @param rexBuilder RexBuilder for LogicalProject creation + */ + private LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right, RexBuilder rexBuilder) { + LogicalJoin incrementalJoin = + LogicalJoin.create(left, right, join.getCondition(), join.getVariablesSet(), join.getJoinType()); ArrayList projects = new ArrayList<>(); ArrayList names = new ArrayList<>(); - IntStream.range(0, relNode.getRowType().getFieldList().size()).forEach(i -> { - projects.add(rexBuilder.makeInputRef(relNode, i)); - names.add(relNode.getRowType().getFieldNames().get(i)); + IntStream.range(0, incrementalJoin.getRowType().getFieldList().size()).forEach(i -> { + projects.add(rexBuilder.makeInputRef(incrementalJoin, i)); + names.add(incrementalJoin.getRowType().getFieldNames().get(i)); }); - return LogicalProject.create(relNode, projects, names); - } - - private static LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right, - RexBuilder rexBuilder) { - LogicalJoin incrementalJoin = - LogicalJoin.create(left, right, join.getCondition(), join.getVariablesSet(), join.getJoinType()); - return createProjectOverNode(incrementalJoin, rexBuilder); + return LogicalProject.create(incrementalJoin, projects, names); } } diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 0033dc858..2c5262ce5 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -7,6 +7,8 @@ import java.io.File; import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; import org.apache.calcite.rel.RelNode; import org.apache.calcite.sql.SqlNode; @@ -41,9 +43,8 @@ public void afterClass() throws IOException { } public String convert(RelNode relNode) { - IncrementalTransformerResults incrementalTransformerResults = - RelNodeIncrementalTransformer.performIncrementalTransformation(relNode); - RelNode incrementalRelNode = incrementalTransformerResults.getIncrementalRelNode(); + RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer(); + RelNode incrementalRelNode = transformer.convertRelIncremental(relNode); CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); SqlNode sqlNode = converter.convert(incrementalRelNode); return sqlNode.toSqlString(converter.INSTANCE).getSql(); @@ -54,6 +55,28 @@ public String getIncrementalModification(String sql) { return convert(originalRelNode); } + public void checkAllSnapshotAndDeltaQueries(String sql, Map snapshotExpected, + Map deltaExpected) { + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); + RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer(); + transformer.convertRelIncremental(originalRelNode); + Map snapshotRelNodes = transformer.getSnapshotRelNodes(); + Map deltaRelNodes = transformer.getDeltaRelNodes(); + for (String key : snapshotRelNodes.keySet()) { + RelNode actualSnapshotRelNode = snapshotRelNodes.get(key); + SqlNode sqlNode = converter.convert(actualSnapshotRelNode); + String actualSql = sqlNode.toSqlString(converter.INSTANCE).getSql(); + assertEquals(actualSql, snapshotExpected.get(key)); + } + for (String key : deltaRelNodes.keySet()) { + RelNode actualDeltaRelNode = deltaRelNodes.get(key); + SqlNode sqlNode = converter.convert(actualDeltaRelNode); + String actualSql = sqlNode.toSqlString(converter.INSTANCE).getSql(); + assertEquals(actualSql, deltaExpected.get(key)); + } + } + @Test public void testSimpleSelectAll() { String sql = "SELECT * FROM test.foo"; @@ -115,13 +138,27 @@ public void testSelectSpecificJoin() { public void testNestedJoin() { String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; - String expected = "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#0\n" - + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#0.a2 = gamma_delta.g2\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM Table#0_delta AS Table#0_delta\n" - + "INNER JOIN test.gamma AS gamma ON Table#0_delta.a2 = gamma.g2) AS t\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM Table#0_delta AS Table#0_delta0\n" - + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0"; - assertEquals(getIncrementalModification(sql), expected); + Map snapshotExpected = new LinkedHashMap<>(); + snapshotExpected.put("Table#0", + "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1"); + snapshotExpected.put("Table#1", + "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2"); + Map deltaExpected = new LinkedHashMap<>(); + deltaExpected.put("Table#0_delta", + "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha AS alpha0\n" + + "INNER JOIN test.beta_delta AS beta_delta ON alpha0.a1 = beta_delta.b1\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.alpha_delta AS alpha_delta\n" + + "INNER JOIN test.beta AS beta0 ON alpha_delta.a1 = beta0.b1) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.alpha_delta AS alpha_delta0\n" + + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0"); + deltaExpected.put("Table#1_delta", + "SELECT t3.a2, t3.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#00\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#00.a2 = gamma_delta.g2\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma AS gamma0 ON Table#0_delta.a2 = gamma0.g2) AS t2\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t3"); + checkAllSnapshotAndDeltaQueries(sql, snapshotExpected, deltaExpected); } @Test @@ -129,12 +166,35 @@ public void testThreeNestedJoins() { String nestedJoin1 = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; String nestedJoin2 = "SELECT a2, g1 FROM (" + nestedJoin1 + ") AS nj1 JOIN test.gamma ON nj1.a2 = test.gamma.g2"; String sql = "SELECT g1, e2 FROM (" + nestedJoin2 + ") AS nj2 JOIN test.epsilon ON nj2.g1 = test.epsilon.e1"; - String expected = "SELECT t0.g1, t0.e2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#2 AS Table#2\n" - + "INNER JOIN test.epsilon_delta AS epsilon_delta ON Table#2.g1 = epsilon_delta.e1\n" + "UNION ALL\n" - + "SELECT *\n" + "FROM Table#2_delta AS Table#2_delta\n" - + "INNER JOIN test.epsilon AS epsilon ON Table#2_delta.g1 = epsilon.e1) AS t\n" + "UNION ALL\n" + "SELECT *\n" - + "FROM Table#2_delta AS Table#2_delta0\n" - + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#2_delta0.g1 = epsilon_delta0.e1) AS t0"; - assertEquals(getIncrementalModification(sql), expected); + Map snapshotExpected = new LinkedHashMap<>(); + snapshotExpected.put("Table#0", + "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1"); + snapshotExpected.put("Table#1", + "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2"); + snapshotExpected.put("Table#2", + "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1"); + Map deltaExpected = new LinkedHashMap<>(); + deltaExpected.put("Table#0_delta", + "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha AS alpha0\n" + + "INNER JOIN test.beta_delta AS beta_delta ON alpha0.a1 = beta_delta.b1\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.alpha_delta AS alpha_delta\n" + + "INNER JOIN test.beta AS beta0 ON alpha_delta.a1 = beta0.b1) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.alpha_delta AS alpha_delta0\n" + + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0"); + deltaExpected.put("Table#1_delta", + "SELECT t3.a2, t3.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#00\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#00.a2 = gamma_delta.g2\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma AS gamma0 ON Table#0_delta.a2 = gamma0.g2) AS t2\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t3"); + deltaExpected.put("Table#2_delta", + "SELECT t6.g1, t6.e2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#1 AS Table#10\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta ON Table#10.g1 = epsilon_delta.e1\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta\n" + + "INNER JOIN test.epsilon AS epsilon0 ON Table#1_delta.g1 = epsilon0.e1) AS t5\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta0\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#1_delta0.g1 = epsilon_delta0.e1) AS t6"); + checkAllSnapshotAndDeltaQueries(sql, snapshotExpected, deltaExpected); } } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java index 61cb3eb44..055066ff6 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java @@ -8,7 +8,6 @@ import org.apache.calcite.rel.RelNode; import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; -import com.linkedin.coral.incremental.IncrementalTransformerResults; import com.linkedin.coral.incremental.RelNodeIncrementalTransformer; import com.linkedin.coral.spark.CoralSpark; @@ -19,9 +18,8 @@ public class IncrementalUtils { public static String getSparkIncrementalQueryFromUserSql(String query) { RelNode originalNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); - IncrementalTransformerResults incrementalTransformerResults = - RelNodeIncrementalTransformer.performIncrementalTransformation(originalNode); - RelNode incrementalRelNode = incrementalTransformerResults.getIncrementalRelNode(); + RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer(); + RelNode incrementalRelNode = transformer.convertRelIncremental(originalNode); CoralSpark coralSpark = CoralSpark.create(incrementalRelNode); return coralSpark.getSparkSql(); } From 00bc8cb17d77800a2c18074bcad479a9c9e9d6ce Mon Sep 17 00:00:00 2001 From: Alice Yeh Date: Fri, 28 Apr 2023 11:08:41 -0700 Subject: [PATCH 06/21] Refactor subtree replacement method name --- .../incremental/RelNodeIncrementalTransformer.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index dce497d10..9681166ae 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -110,16 +110,16 @@ public RelNode visit(LogicalJoin join) { String leftIncrementalDescription = getDescriptionFromRelNode(left, true); if (snapshotRelNodes.containsKey(leftDescription)) { left = - createTableScanForGivenRelNode(getDeterministicDescriptionFromDescription(leftDescription, false), left); - incrementalLeft = createTableScanForGivenRelNode( + susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(leftDescription, false), left); + incrementalLeft = susbstituteWithMaterializedView( getDeterministicDescriptionFromDescription(leftIncrementalDescription, true), incrementalLeft); } String rightDescription = getDescriptionFromRelNode(right, false); String rightIncrementalDescription = getDescriptionFromRelNode(right, true); if (snapshotRelNodes.containsKey(rightDescription)) { - right = createTableScanForGivenRelNode(getDeterministicDescriptionFromDescription(rightDescription, false), + right = susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(rightDescription, false), right); - incrementalRight = createTableScanForGivenRelNode( + incrementalRight = susbstituteWithMaterializedView( getDeterministicDescriptionFromDescription(rightIncrementalDescription, true), incrementalRight); } @@ -222,7 +222,7 @@ private String getDeterministicDescriptionFromDescription(String description, bo * @param relOptTableName table name corresponding to table to scan over * @param relNode top-level RelNode that will be replaced with the TableScan */ - private TableScan createTableScanForGivenRelNode(String relOptTableName, RelNode relNode) { + private TableScan susbstituteWithMaterializedView(String relOptTableName, RelNode relNode) { RelOptTable table = RelOptTableImpl.create(relOptSchema, relNode.getRowType(), Collections.singletonList(relOptTableName), null); return LogicalTableScan.create(relNode.getCluster(), table); From 31e04d785ab1fb5fe5e33ffd545d905cbc98be55 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Fri, 26 Jul 2024 16:16:54 -0700 Subject: [PATCH 07/21] feat: add prev format rewrite --- .../RelNodeGenerationTransformer.java | 307 ++++++++++++++++++ .../RelNodeIncrementalTransformer.java | 2 +- .../RelToIncrementalSqlConverterTest.java | 2 +- .../linkedin/coral/incremental/TestUtils.java | 2 +- .../coralservice/utils/IncrementalUtils.java | 2 +- 5 files changed, 311 insertions(+), 4 deletions(-) create mode 100644 coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java new file mode 100644 index 000000000..baa3a2e05 --- /dev/null +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -0,0 +1,307 @@ +/** + * Copyright 2024 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.incremental; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.prepare.RelOptTableImpl; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.rel.RelShuttleImpl; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalTableScan; +import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; + + +public class RelNodeGenerationTransformer { + private final String TABLE_NAME_PREFIX = "Table#"; + private final String DELTA_SUFFIX = "_delta"; + + private RelOptSchema relOptSchema; + private Map snapshotRelNodes; + private Map deltaRelNodes; + private RelNode tempLastRelNode; + + public RelNodeGenerationTransformer() { + relOptSchema = null; + snapshotRelNodes = new LinkedHashMap<>(); + deltaRelNodes = new LinkedHashMap<>(); + tempLastRelNode = null; + } + + /** + * Returns snapshotRelNodes with deterministic keys. + */ + public Map getSnapshotRelNodes() { + Map deterministicSnapshotRelNodes = new LinkedHashMap<>(); + for (String description : snapshotRelNodes.keySet()) { + deterministicSnapshotRelNodes.put(getDeterministicDescriptionFromDescription(description, false), + snapshotRelNodes.get(description)); + } + return deterministicSnapshotRelNodes; + } + + /** + * Returns deltaRelNodes with deterministic keys. + */ + public Map getDeltaRelNodes() { + Map deterministicDeltaRelNodes = new LinkedHashMap<>(); + for (String description : deltaRelNodes.keySet()) { + deterministicDeltaRelNodes.put(getDeterministicDescriptionFromDescription(description, true), + deltaRelNodes.get(description)); + } + return deterministicDeltaRelNodes; + } + + private RelNode convertRelPrev(RelNode originalNode) { + RelShuttle converter = new RelShuttleImpl() { + @Override + public RelNode visit(TableScan scan) { + RelOptTable originalTable = scan.getTable(); + List incrementalNames = new ArrayList<>(originalTable.getQualifiedName()); + String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + "_prev"; + incrementalNames.add(deltaTableName); + RelOptTable incrementalTable = + RelOptTableImpl.create(originalTable.getRelOptSchema(), originalTable.getRowType(), incrementalNames, null); + return LogicalTableScan.create(scan.getCluster(), incrementalTable); + } + + @Override + public RelNode visit(LogicalJoin join) { + RelNode left = join.getLeft(); + RelNode right = join.getRight(); + RelNode prevLeft = convertRelPrev(left); + RelNode prevRight = convertRelPrev(right); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + + LogicalProject p3 = createProjectOverJoin(join, prevLeft, prevRight, rexBuilder); + + return p3; + } + + @Override + public RelNode visit(LogicalFilter filter) { + RelNode transformedChild = convertRelPrev(filter.getInput()); + + return LogicalFilter.create(transformedChild, filter.getCondition()); + } + + @Override + public RelNode visit(LogicalProject project) { + RelNode transformedChild = convertRelPrev(project.getInput()); + return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + } + + @Override + public RelNode visit(LogicalUnion union) { + List children = union.getInputs(); + List transformedChildren = + children.stream().map(child -> convertRelPrev(child)).collect(Collectors.toList()); + return LogicalUnion.create(transformedChildren, union.all); + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + RelNode transformedChild = convertRelPrev(aggregate.getInput()); + return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), + aggregate.getAggCallList()); + } + }; + return originalNode.accept(converter); + } + + /** + * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes. + * @param originalNode input RelNode to generate an incremental version for. + */ + public RelNode convertRelIncremental(RelNode originalNode) { + RelShuttle converter = new RelShuttleImpl() { + @Override + public RelNode visit(TableScan scan) { + RelOptTable originalTable = scan.getTable(); + + // Set RelNodeIncrementalTransformer class relOptSchema if not already set + if (relOptSchema == null) { + relOptSchema = originalTable.getRelOptSchema(); + } + + // Create delta scan + List incrementalNames = new ArrayList<>(originalTable.getQualifiedName()); + String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + DELTA_SUFFIX; + incrementalNames.add(deltaTableName); + RelOptTable incrementalTable = + RelOptTableImpl.create(originalTable.getRelOptSchema(), originalTable.getRowType(), incrementalNames, null); + return LogicalTableScan.create(scan.getCluster(), incrementalTable); + } + + @Override + public RelNode visit(LogicalJoin join) { + RelNode left = join.getLeft(); + RelNode right = join.getRight(); + RelNode incrementalLeft = convertRelIncremental(left); + RelNode incrementalRight = convertRelIncremental(right); + + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + + // Check if we can replace the left and right nodes with a scan of a materialized table + String leftDescription = getDescriptionFromRelNode(left, false); + String leftIncrementalDescription = getDescriptionFromRelNode(left, true); + if (snapshotRelNodes.containsKey(leftDescription)) { + left = + susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(leftDescription, false), left); + incrementalLeft = susbstituteWithMaterializedView( + getDeterministicDescriptionFromDescription(leftIncrementalDescription, true), incrementalLeft); + } + String rightDescription = getDescriptionFromRelNode(right, false); + String rightIncrementalDescription = getDescriptionFromRelNode(right, true); + if (snapshotRelNodes.containsKey(rightDescription)) { + right = susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(rightDescription, false), + right); + incrementalRight = susbstituteWithMaterializedView( + getDeterministicDescriptionFromDescription(rightIncrementalDescription, true), incrementalRight); + } + RelNode prevLeft = convertRelPrev(left); + RelNode prevRight = convertRelPrev(right); + + // We need to do this in the join to get potentially updated left and right nodes + tempLastRelNode = createProjectOverJoin(join, left, right, rexBuilder); + + LogicalProject p1 = createProjectOverJoin(join, prevLeft, incrementalRight, rexBuilder); + LogicalProject p2 = createProjectOverJoin(join, incrementalLeft, prevRight, rexBuilder); + LogicalProject p3 = createProjectOverJoin(join, incrementalLeft, incrementalRight, rexBuilder); + + LogicalUnion unionAllJoins = + LogicalUnion.create(Arrays.asList(LogicalUnion.create(Arrays.asList(p1, p2), true), p3), true); + + return unionAllJoins; + } + + @Override + public RelNode visit(LogicalFilter filter) { + RelNode transformedChild = convertRelIncremental(filter.getInput()); + return LogicalFilter.create(transformedChild, filter.getCondition()); + } + + @Override + public RelNode visit(LogicalProject project) { + RelNode transformedChild = convertRelIncremental(project.getInput()); + RelNode materializedProject = getTempLastRelNode(); + if (materializedProject != null) { + snapshotRelNodes.put(getDescriptionFromRelNode(project, false), materializedProject); + } else { + snapshotRelNodes.put(getDescriptionFromRelNode(project, false), project); + } + LogicalProject transformedProject = + LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + deltaRelNodes.put(getDescriptionFromRelNode(project, true), transformedProject); + return transformedProject; + } + + @Override + public RelNode visit(LogicalUnion union) { + List children = union.getInputs(); + List transformedChildren = + children.stream().map(child -> convertRelIncremental(child)).collect(Collectors.toList()); + return LogicalUnion.create(transformedChildren, union.all); + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + RelNode transformedChild = convertRelIncremental(aggregate.getInput()); + return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), + aggregate.getAggCallList()); + } + }; + return originalNode.accept(converter); + } + + /** + * Returns the tempLastRelNode and sets the variable back to null. Should only be called once for each retrieval + * instance since subsequent consecutive calls will yield null. + */ + private RelNode getTempLastRelNode() { + RelNode currentTempLastRelNode = tempLastRelNode; + tempLastRelNode = null; + return currentTempLastRelNode; + } + + /** + * Returns the corresponding description for a given RelNode by extracting the identifier (ex. the identifier for + * LogicalProject#22 is 22) and prepending the TABLE_NAME_PREFIX. Depending on the delta value, a delta suffix may be + * appended. + * @param relNode RelNode from which the identifier will be retrieved. + * @param delta configure whether to get the delta name + */ + private String getDescriptionFromRelNode(RelNode relNode, boolean delta) { + String identifier = relNode.getDescription().split("#")[1]; + String description = TABLE_NAME_PREFIX + identifier; + if (delta) { + return description + DELTA_SUFFIX; + } + return description; + } + + /** + * Returns a description based on mapping index order that will stay the same across different runs of the same + * query. The description consists of the table prefix, the index, and optionally, the delta suffix. + * @param description output from calling getDescriptionFromRelNode() + * @param delta configure whether to get the delta name + */ + private String getDeterministicDescriptionFromDescription(String description, boolean delta) { + if (delta) { + List deltaKeyOrdering = new ArrayList<>(deltaRelNodes.keySet()); + return TABLE_NAME_PREFIX + deltaKeyOrdering.indexOf(description) + DELTA_SUFFIX; + } else { + List snapshotKeyOrdering = new ArrayList<>(snapshotRelNodes.keySet()); + return TABLE_NAME_PREFIX + snapshotKeyOrdering.indexOf(description); + } + } + + /** + * Accepts a table name and RelNode and creates a TableScan over the RelNode using the class relOptSchema. + * @param relOptTableName table name corresponding to table to scan over + * @param relNode top-level RelNode that will be replaced with the TableScan + */ + private TableScan susbstituteWithMaterializedView(String relOptTableName, RelNode relNode) { + RelOptTable table = + RelOptTableImpl.create(relOptSchema, relNode.getRowType(), Collections.singletonList(relOptTableName), null); + return LogicalTableScan.create(relNode.getCluster(), table); + } + + /** Creates a LogicalProject whose input is an incremental LogicalJoin node that is constructed from a left and right + * RelNode and LogicalJoin. + * @param join LogicalJoin to create the incremental join from + * @param left left RelNode child of the incremental join + * @param right right RelNode child of the incremental join + * @param rexBuilder RexBuilder for LogicalProject creation + */ + private LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right, RexBuilder rexBuilder) { + LogicalJoin incrementalJoin = + LogicalJoin.create(left, right, join.getCondition(), join.getVariablesSet(), join.getJoinType()); + ArrayList projects = new ArrayList<>(); + ArrayList names = new ArrayList<>(); + IntStream.range(0, incrementalJoin.getRowType().getFieldList().size()).forEach(i -> { + projects.add(rexBuilder.makeInputRef(incrementalJoin, i)); + names.add(incrementalJoin.getRowType().getFieldNames().get(i)); + }); + return LogicalProject.create(incrementalJoin, projects, names); + } +} diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 9681166ae..4a31cb539 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 2c5262ce5..af1856f57 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java index 7384be9a8..4c0c12faa 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/TestUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java index 055066ff6..68c99aacc 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ From 0a884322ac860a2a9bce02ed8ee9d012e9390cd6 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Fri, 26 Jul 2024 16:26:33 -0700 Subject: [PATCH 08/21] fix merge conflict --- .../linkedin/coral/coralservice/utils/IncrementalUtils.java | 6 +----- .../coral/coralservice/utils/VisualizationUtils.java | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java index 8601708a8..6f931309c 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java @@ -30,11 +30,7 @@ public static String getIncrementalQuery(String query, String sourceLanguage, St break; } - - public static String getSparkIncrementalQueryFromUserSql(String query) { - RelNode originalNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); - - RelNode incrementalRelNode = new RelNodeIncrementalTransformer.convertRelIncremental(originalNode); + RelNode incrementalRelNode = new RelNodeIncrementalTransformer().convertRelIncremental(originalNode); switch (targetLanguage.toLowerCase()) { case "trino": diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java index a3b91bd94..4655801b4 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2022-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2022-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -46,7 +46,7 @@ public ArrayList generateIRVisualizations(String query, String sourceLangu // Pass in pre-rewrite rel node switch (rewriteType) { case INCREMENTAL: - postRewriteRelNode = RelNodeIncrementalTransformer.convertRelIncremental(relNode); + postRewriteRelNode = new RelNodeIncrementalTransformer().convertRelIncremental(relNode); break; case DATAMASKING: default: From 019b1b6771bea470cfab749379d8c75742c504db Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Fri, 26 Jul 2024 16:28:54 -0700 Subject: [PATCH 09/21] feat: add all incremental rewrite --- .../RelNodeIncrementalTransformer.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 4a31cb539..f815677f1 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -5,6 +5,7 @@ */ package com.linkedin.coral.incremental; +import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -29,6 +30,7 @@ import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlNode; public class RelNodeIncrementalTransformer { @@ -47,6 +49,45 @@ public RelNodeIncrementalTransformer() { deltaRelNodes = new LinkedHashMap<>(); tempLastRelNode = null; } + public List generateIncrementalRelNodes(RelNode relNode) { + RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer(); + CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); + Map snapshotRelNodes = transformer.getSnapshotRelNodes(); + Map deltaRelNodes = transformer.getDeltaRelNodes(); + List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes); + combinedLists.add(Arrays.asList(relNode)); + for(List plan : combinedLists) { + for(RelNode node : plan) { + SqlNode sqlNode = converter.convert(node); + System.out.println(sqlNode.toSqlString(converter.INSTANCE).getSql()); + System.out.println(" "); + } + System.out.println("XXXXXXXXXX"); + } + return combinedLists.get(0); + } + + public List> generateCombinedLists(Map tMap, Map mMap) { + List> resultList = new ArrayList<>(); + int n = tMap.size(); // Assuming tMap and mMap have the same size + + for (int i = 0; i < n; i++) { + List tempList = new ArrayList<>(); + + // Add elements from tMap and mMap following the rule + for (int j = 0; j < n; j++) { + if (j <= i) { + tempList.add(tMap.get("Table#" + j + "_delta")); + } else { + tempList.add(mMap.get("Table#" + j)); + } + } + + resultList.add(tempList); + } + + return resultList; + } /** * Returns snapshotRelNodes with deterministic keys. From 9818c6fb527ebbca1356525391898c72c2783c8c Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Mon, 29 Jul 2024 11:13:45 -0700 Subject: [PATCH 10/21] feat: uniform RelNode format --- .../RelNodeGenerationTransformer.java | 157 ++++++++++++++++++ .../RelNodeIncrementalTransformer.java | 39 ----- .../RelToIncrementalSqlConverterTest.java | 30 ++++ 3 files changed, 187 insertions(+), 39 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index baa3a2e05..3acb88ab5 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -5,12 +5,15 @@ */ package com.linkedin.coral.incremental; +import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -29,6 +32,7 @@ import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlNode; public class RelNodeGenerationTransformer { @@ -39,12 +43,56 @@ public class RelNodeGenerationTransformer { private Map snapshotRelNodes; private Map deltaRelNodes; private RelNode tempLastRelNode; + private Set needsProj; public RelNodeGenerationTransformer() { relOptSchema = null; snapshotRelNodes = new LinkedHashMap<>(); deltaRelNodes = new LinkedHashMap<>(); tempLastRelNode = null; + needsProj = new HashSet<>(); + } + + public List> generateIncrementalRelNodes(RelNode relNode) { + getLevelRelation(relNode); + relNode = uniformFormat(relNode); + convertRelIncremental(relNode); + CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); + Map snapshotRelNodes = getSnapshotRelNodes(); + Map deltaRelNodes = getDeltaRelNodes(); + List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes); + combinedLists.add(Arrays.asList(relNode)); + for(List plan : combinedLists) { + for(RelNode node : plan) { + SqlNode sqlNode = converter.convert(node); + System.out.println(sqlNode.toSqlString(converter.INSTANCE).getSql()); + System.out.println(" "); + } + System.out.println("XXXXXXXXXX"); + } + return combinedLists; + } + + private List> generateCombinedLists(Map tMap, Map mMap) { + List> resultList = new ArrayList<>(); + int n = tMap.size(); // Assuming tMap and mMap have the same size + + for (int i = 0; i < n; i++) { + List tempList = new ArrayList<>(); + + // Add elements from tMap and mMap following the rule + for (int j = 0; j < n; j++) { + if (j <= i) { + tempList.add(tMap.get("Table#" + j + "_delta")); + } else { + tempList.add(mMap.get("Table#" + j)); + } + } + + resultList.add(tempList); + } + + return resultList; } /** @@ -71,6 +119,67 @@ public Map getDeltaRelNodes() { return deterministicDeltaRelNodes; } + private void getLevelRelation(RelNode relNode) { + RelShuttle converter = new RelShuttleImpl() { + + @Override + public RelNode visit(LogicalJoin join) { + RelNode left = join.getLeft(); + RelNode right = join.getRight(); + if(left instanceof LogicalJoin) { + needsProj.add(left); + } + if(right instanceof LogicalJoin) { + needsProj.add(right); + } + + getLevelRelation(left); + getLevelRelation(right); + + return join; + } + + @Override + public RelNode visit(LogicalFilter filter) { + if(filter.getInput() instanceof LogicalJoin) { + needsProj.add(filter.getInput()); + } + getLevelRelation(filter.getInput()); + + return filter; + } + + @Override + public RelNode visit(LogicalProject project) { + getLevelRelation(project.getInput()); + return project; + } + + @Override + public RelNode visit(LogicalUnion union) { + List children = union.getInputs(); + for(RelNode child : children) { + if(child instanceof LogicalJoin) { + needsProj.add(child); + } + getLevelRelation(child); + } + + return union; + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + if(aggregate.getInput() instanceof LogicalJoin) { + needsProj.add(aggregate.getInput()); + } + getLevelRelation(aggregate.getInput()); + + return aggregate; + } + }; + relNode.accept(converter); + } private RelNode convertRelPrev(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @Override @@ -128,6 +237,54 @@ public RelNode visit(LogicalAggregate aggregate) { return originalNode.accept(converter); } + private RelNode uniformFormat(RelNode originalNode){ + RelShuttle converter = new RelShuttleImpl() { + + @Override + public RelNode visit(LogicalJoin join) { + RelNode left = join.getLeft(); + RelNode right = join.getRight(); + RelNode uniLeft = uniformFormat(left); + RelNode uniRight = uniformFormat(right); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + if(needsProj.contains(join)) { + LogicalProject p1 = createProjectOverJoin(join, uniLeft, uniRight, rexBuilder); + return p1; + } + return + LogicalJoin.create(uniLeft, uniRight, join.getCondition(), join.getVariablesSet(), join.getJoinType()); + } + + @Override + public RelNode visit(LogicalFilter filter) { + RelNode transformedChild = uniformFormat(filter.getInput()); + + return LogicalFilter.create(transformedChild, filter.getCondition()); + } + + @Override + public RelNode visit(LogicalProject project) { + RelNode transformedChild = uniformFormat(project.getInput()); + return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + } + + @Override + public RelNode visit(LogicalUnion union) { + List children = union.getInputs(); + List transformedChildren = + children.stream().map(child -> uniformFormat(child)).collect(Collectors.toList()); + return LogicalUnion.create(transformedChildren, union.all); + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + RelNode transformedChild = uniformFormat(aggregate.getInput()); + return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), + aggregate.getAggCallList()); + } + }; + return originalNode.accept(converter); + } /** * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes. * @param originalNode input RelNode to generate an incremental version for. diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index f815677f1..9527eaaf9 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -49,45 +49,6 @@ public RelNodeIncrementalTransformer() { deltaRelNodes = new LinkedHashMap<>(); tempLastRelNode = null; } - public List generateIncrementalRelNodes(RelNode relNode) { - RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer(); - CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); - Map snapshotRelNodes = transformer.getSnapshotRelNodes(); - Map deltaRelNodes = transformer.getDeltaRelNodes(); - List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes); - combinedLists.add(Arrays.asList(relNode)); - for(List plan : combinedLists) { - for(RelNode node : plan) { - SqlNode sqlNode = converter.convert(node); - System.out.println(sqlNode.toSqlString(converter.INSTANCE).getSql()); - System.out.println(" "); - } - System.out.println("XXXXXXXXXX"); - } - return combinedLists.get(0); - } - - public List> generateCombinedLists(Map tMap, Map mMap) { - List> resultList = new ArrayList<>(); - int n = tMap.size(); // Assuming tMap and mMap have the same size - - for (int i = 0; i < n; i++) { - List tempList = new ArrayList<>(); - - // Add elements from tMap and mMap following the rule - for (int j = 0; j < n; j++) { - if (j <= i) { - tempList.add(tMap.get("Table#" + j + "_delta")); - } else { - tempList.add(mMap.get("Table#" + j)); - } - } - - resultList.add(tempList); - } - - return resultList; - } /** * Returns snapshotRelNodes with deterministic keys. diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index af1856f57..1395cc3c6 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -8,6 +8,7 @@ import java.io.File; import java.io.IOException; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.apache.calcite.rel.RelNode; @@ -134,6 +135,35 @@ public void testSelectSpecificJoin() { assertEquals(getIncrementalModification(sql), expected); } + @Test + public void testSimpleJ() { + String sql = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); + List> nodes = transformer.generateIncrementalRelNodes(originalRelNode); + assertEquals(nodes.size(), 2); + } + + @Test + public void mytest1() { + String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); + List> nodes = transformer.generateIncrementalRelNodes(originalRelNode); + assertEquals(nodes.size(), 3); + } + + @Test + public void mytest2() { + String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String sql2 = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; + String sql = "SELECT a1, a2, g1 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1 JOIN test.gamma ON test.alpha.a2 = test.gamma.g2"; + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); + RelNode n2 = hiveToRelConverter.convertSql(sql2); + transformer.generateIncrementalRelNodes(originalRelNode); + } @Test public void testNestedJoin() { String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; From f203ad87f52fe51a2bff39553455c462d1eb15ff Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Mon, 29 Jul 2024 14:10:26 -0700 Subject: [PATCH 11/21] feat: consolidate unit tests --- .../RelNodeGenerationTransformer.java | 114 +++++------ .../RelNodeIncrementalTransformer.java | 2 - .../incremental/RelNodeGenerationTest.java | 189 ++++++++++++++++++ .../RelToIncrementalSqlConverterTest.java | 4 +- 4 files changed, 250 insertions(+), 59 deletions(-) create mode 100644 coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 3acb88ab5..3c0263e5f 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -5,7 +5,6 @@ */ package com.linkedin.coral.incremental; -import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -34,6 +33,8 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlNode; +import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; + public class RelNodeGenerationTransformer { private final String TABLE_NAME_PREFIX = "Table#"; @@ -62,8 +63,8 @@ public List> generateIncrementalRelNodes(RelNode relNode) { Map deltaRelNodes = getDeltaRelNodes(); List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes); combinedLists.add(Arrays.asList(relNode)); - for(List plan : combinedLists) { - for(RelNode node : plan) { + for (List plan : combinedLists) { + for (RelNode node : plan) { SqlNode sqlNode = converter.convert(node); System.out.println(sqlNode.toSqlString(converter.INSTANCE).getSql()); System.out.println(" "); @@ -73,19 +74,19 @@ public List> generateIncrementalRelNodes(RelNode relNode) { return combinedLists; } - private List> generateCombinedLists(Map tMap, Map mMap) { + private List> generateCombinedLists(Map deltaRelNodes, + Map snapshotRelNodes) { List> resultList = new ArrayList<>(); - int n = tMap.size(); // Assuming tMap and mMap have the same size + assert (deltaRelNodes.size() == snapshotRelNodes.size()); + int n = deltaRelNodes.size(); for (int i = 0; i < n; i++) { List tempList = new ArrayList<>(); - - // Add elements from tMap and mMap following the rule for (int j = 0; j < n; j++) { if (j <= i) { - tempList.add(tMap.get("Table#" + j + "_delta")); + tempList.add(deltaRelNodes.get("Table#" + j + "_delta")); } else { - tempList.add(mMap.get("Table#" + j)); + tempList.add(snapshotRelNodes.get("Table#" + j)); } } @@ -126,10 +127,10 @@ private void getLevelRelation(RelNode relNode) { public RelNode visit(LogicalJoin join) { RelNode left = join.getLeft(); RelNode right = join.getRight(); - if(left instanceof LogicalJoin) { + if (left instanceof LogicalJoin) { needsProj.add(left); } - if(right instanceof LogicalJoin) { + if (right instanceof LogicalJoin) { needsProj.add(right); } @@ -141,7 +142,7 @@ public RelNode visit(LogicalJoin join) { @Override public RelNode visit(LogicalFilter filter) { - if(filter.getInput() instanceof LogicalJoin) { + if (filter.getInput() instanceof LogicalJoin) { needsProj.add(filter.getInput()); } getLevelRelation(filter.getInput()); @@ -158,8 +159,8 @@ public RelNode visit(LogicalProject project) { @Override public RelNode visit(LogicalUnion union) { List children = union.getInputs(); - for(RelNode child : children) { - if(child instanceof LogicalJoin) { + for (RelNode child : children) { + if (child instanceof LogicalJoin) { needsProj.add(child); } getLevelRelation(child); @@ -170,7 +171,7 @@ public RelNode visit(LogicalUnion union) { @Override public RelNode visit(LogicalAggregate aggregate) { - if(aggregate.getInput() instanceof LogicalJoin) { + if (aggregate.getInput() instanceof LogicalJoin) { needsProj.add(aggregate.getInput()); } getLevelRelation(aggregate.getInput()); @@ -180,6 +181,7 @@ public RelNode visit(LogicalAggregate aggregate) { }; relNode.accept(converter); } + private RelNode convertRelPrev(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @Override @@ -237,54 +239,54 @@ public RelNode visit(LogicalAggregate aggregate) { return originalNode.accept(converter); } - private RelNode uniformFormat(RelNode originalNode){ - RelShuttle converter = new RelShuttleImpl() { - - @Override - public RelNode visit(LogicalJoin join) { - RelNode left = join.getLeft(); - RelNode right = join.getRight(); - RelNode uniLeft = uniformFormat(left); - RelNode uniRight = uniformFormat(right); - RexBuilder rexBuilder = join.getCluster().getRexBuilder(); - if(needsProj.contains(join)) { - LogicalProject p1 = createProjectOverJoin(join, uniLeft, uniRight, rexBuilder); - return p1; - } - return - LogicalJoin.create(uniLeft, uniRight, join.getCondition(), join.getVariablesSet(), join.getJoinType()); + private RelNode uniformFormat(RelNode originalNode) { + RelShuttle converter = new RelShuttleImpl() { + + @Override + public RelNode visit(LogicalJoin join) { + RelNode left = join.getLeft(); + RelNode right = join.getRight(); + RelNode uniLeft = uniformFormat(left); + RelNode uniRight = uniformFormat(right); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + if (needsProj.contains(join)) { + LogicalProject p1 = createProjectOverJoin(join, uniLeft, uniRight, rexBuilder); + return p1; } + return LogicalJoin.create(uniLeft, uniRight, join.getCondition(), join.getVariablesSet(), join.getJoinType()); + } - @Override - public RelNode visit(LogicalFilter filter) { - RelNode transformedChild = uniformFormat(filter.getInput()); + @Override + public RelNode visit(LogicalFilter filter) { + RelNode transformedChild = uniformFormat(filter.getInput()); - return LogicalFilter.create(transformedChild, filter.getCondition()); - } + return LogicalFilter.create(transformedChild, filter.getCondition()); + } - @Override - public RelNode visit(LogicalProject project) { - RelNode transformedChild = uniformFormat(project.getInput()); - return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); - } + @Override + public RelNode visit(LogicalProject project) { + RelNode transformedChild = uniformFormat(project.getInput()); + return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType()); + } - @Override - public RelNode visit(LogicalUnion union) { - List children = union.getInputs(); - List transformedChildren = - children.stream().map(child -> uniformFormat(child)).collect(Collectors.toList()); - return LogicalUnion.create(transformedChildren, union.all); - } + @Override + public RelNode visit(LogicalUnion union) { + List children = union.getInputs(); + List transformedChildren = + children.stream().map(child -> uniformFormat(child)).collect(Collectors.toList()); + return LogicalUnion.create(transformedChildren, union.all); + } - @Override - public RelNode visit(LogicalAggregate aggregate) { - RelNode transformedChild = uniformFormat(aggregate.getInput()); - return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), - aggregate.getAggCallList()); - } - }; - return originalNode.accept(converter); + @Override + public RelNode visit(LogicalAggregate aggregate) { + RelNode transformedChild = uniformFormat(aggregate.getInput()); + return LogicalAggregate.create(transformedChild, aggregate.getGroupSet(), aggregate.getGroupSets(), + aggregate.getAggCallList()); + } + }; + return originalNode.accept(converter); } + /** * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes. * @param originalNode input RelNode to generate an incremental version for. diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java index 9527eaaf9..4a31cb539 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java @@ -5,7 +5,6 @@ */ package com.linkedin.coral.incremental; -import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -30,7 +29,6 @@ import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlNode; public class RelNodeIncrementalTransformer { diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java new file mode 100644 index 000000000..84fe6bb03 --- /dev/null +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java @@ -0,0 +1,189 @@ +/** + * Copyright 2024 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.incremental; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlNode; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; + +import static com.linkedin.coral.incremental.TestUtils.*; +import static org.testng.Assert.*; + + +public class RelNodeGenerationTest { + private HiveConf conf; + + @BeforeClass + public void beforeClass() throws HiveException, MetaException, IOException { + conf = TestUtils.loadResourceHiveConf(); + TestUtils.initializeViews(conf); + } + + @AfterTest + public void afterClass() throws IOException { + FileUtils.deleteDirectory(new File(conf.get(CORAL_INCREMENTAL_TEST_DIR))); + } + + public String convert(RelNode relNode) { + CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); + SqlNode sqlNode = converter.convert(relNode); + return sqlNode.toSqlString(converter.INSTANCE).getSql(); + } + + public void checkAllPlans(String sql, List> expected) { + List> plans = getAllPlans(sql); + assertEquals(plans.size(), expected.size()); + for (int i = 0; i < plans.size(); i++) { + List plan = plans.get(i); + List expectedPlan = expected.get(i); + assertEquals(plan.size(), expectedPlan.size()); + for (int j = 0; j < plan.size(); j++) { + String actual = convert(plan.get(j)); + assertEquals(actual, expectedPlan.get(j)); + } + } + } + + public List> getAllPlans(String sql) { + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); + return transformer.generateIncrementalRelNodes(originalRelNode); + } + + @Test + public void testSimpleSelectAll() { + String sql = "SELECT * FROM test.foo"; + List incremental = Arrays.asList("SELECT *\n" + "FROM test.foo_delta AS foo_delta"); + List batch = Arrays.asList("SELECT *\n" + "FROM test.foo AS foo"); + List> expected = Arrays.asList(incremental, batch); + checkAllPlans(sql, expected); + } + + @Test + public void testSimpleJoin() { + String sql = "SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x"; + String incrementalSql = "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1_prev AS bar1_prev\n" + + "INNER JOIN test.bar2_delta AS bar2_delta ON bar1_prev.x = bar2_delta.x\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.bar1_delta AS bar1_delta\n" + + "INNER JOIN test.bar2_prev AS bar2_prev ON bar1_delta.x = bar2_prev.x) AS t\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.bar1_delta AS bar1_delta0\n" + + "INNER JOIN test.bar2_delta AS bar2_delta0 ON bar1_delta0.x = bar2_delta0.x"; + List incremental = Arrays.asList(incrementalSql); + List batch = + Arrays.asList("SELECT *\n" + "FROM test.bar1 AS bar1\n" + "INNER JOIN test.bar2 AS bar2 ON bar1.x = bar2.x"); + List> expected = Arrays.asList(incremental, batch); + checkAllPlans(sql, expected); + } + + @Test + public void testNestedJoin() { + String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; + String Table0_delta = + "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha_prev AS alpha_prev\n" + + "INNER JOIN test.beta_delta AS beta_delta ON alpha_prev.a1 = beta_delta.b1\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM test.alpha_delta AS alpha_delta\n" + + "INNER JOIN test.beta_prev AS beta_prev ON alpha_delta.a1 = beta_prev.b1) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM test.alpha_delta AS alpha_delta0\n" + + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0"; + String Table1_delta = + "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0_prev AS Table#0_prev\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#0_prev.a2 = gamma_delta.g2\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma_prev AS gamma_prev ON Table#0_delta.a2 = gamma_prev.g2) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0"; + List combined = Arrays.asList(Table0_delta, + "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2"); + List incremental = Arrays.asList(Table0_delta, Table1_delta); + List batch = Arrays.asList("SELECT t.a2, gamma.g1\n" + "FROM (SELECT alpha.a1, alpha.a2\n" + + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1) AS t\n" + + "INNER JOIN test.gamma AS gamma ON t.a2 = gamma.g2"); + List> expected = Arrays.asList(combined, incremental, batch); + checkAllPlans(sql, expected); + } + + @Test + public void testThreeTablesJoin() { + String sql = + "SELECT a1, a2, g1 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1 JOIN test.gamma ON test.alpha.a2 = test.gamma.g2"; + String Table0_delta = "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha_prev AS alpha_prev\n" + + "INNER JOIN test.beta_delta AS beta_delta ON alpha_prev.a1 = beta_delta.b1\n" + "UNION ALL\n" + "SELECT *\n" + + "FROM test.alpha_delta AS alpha_delta\n" + + "INNER JOIN test.beta_prev AS beta_prev ON alpha_delta.a1 = beta_prev.b1) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM test.alpha_delta AS alpha_delta0\n" + + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1"; + String Table1_delta = + "SELECT t0.a1, t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0_prev AS Table#0_prev\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#0_prev.a2 = gamma_delta.g2\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma_prev AS gamma_prev ON Table#0_delta.a2 = gamma_prev.g2) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0"; + List combined = Arrays.asList(Table0_delta, + "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2"); + List incremental = Arrays.asList(Table0_delta, Table1_delta); + List batch = Arrays.asList("SELECT alpha.a1, alpha.a2, gamma.g1\n" + "FROM test.alpha AS alpha\n" + + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1\n" + + "INNER JOIN test.gamma AS gamma ON alpha.a2 = gamma.g2"); + List> expected = Arrays.asList(combined, incremental, batch); + checkAllPlans(sql, expected); + } + + @Test + public void testFourTablesJoin() { + String nestedJoin1 = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; + String nestedJoin2 = "SELECT a2, g1 FROM (" + nestedJoin1 + ") AS nj1 JOIN test.gamma ON nj1.a2 = test.gamma.g2"; + String sql = "SELECT g1, e2 FROM (" + nestedJoin2 + ") AS nj2 JOIN test.epsilon ON nj2.g1 = test.epsilon.e1"; + String Table0_delta = + "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha_prev AS alpha_prev\n" + + "INNER JOIN test.beta_delta AS beta_delta ON alpha_prev.a1 = beta_delta.b1\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM test.alpha_delta AS alpha_delta\n" + + "INNER JOIN test.beta_prev AS beta_prev ON alpha_delta.a1 = beta_prev.b1) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM test.alpha_delta AS alpha_delta0\n" + + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0"; + String Table1_delta = + "SELECT t0.a2, t0.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0_prev AS Table#0_prev\n" + + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#0_prev.a2 = gamma_delta.g2\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n" + + "INNER JOIN test.gamma_prev AS gamma_prev ON Table#0_delta.a2 = gamma_prev.g2) AS t\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta0\n" + + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0"; + String Table2_delta = + "SELECT t0.g1, t0.e2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#1_prev AS Table#1_prev\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta ON Table#1_prev.g1 = epsilon_delta.e1\n" + "UNION ALL\n" + + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta\n" + + "INNER JOIN test.epsilon_prev AS epsilon_prev ON Table#1_delta.g1 = epsilon_prev.e1) AS t\n" + + "UNION ALL\n" + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta0\n" + + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#1_delta0.g1 = epsilon_delta0.e1) AS t0"; + List combined1 = Arrays.asList(Table0_delta, + "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2", + "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1"); + List combined2 = Arrays.asList(Table0_delta, Table1_delta, + "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1"); + List incremental = Arrays.asList(Table0_delta, Table1_delta, Table2_delta); + List batch = Arrays + .asList("SELECT t0.g1, epsilon.e2\n" + "FROM (SELECT t.a2, gamma.g1\n" + "FROM (SELECT alpha.a1, alpha.a2\n" + + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1) AS t\n" + + "INNER JOIN test.gamma AS gamma ON t.a2 = gamma.g2) AS t0\n" + + "INNER JOIN test.epsilon AS epsilon ON t0.g1 = epsilon.e1"); + List> expected = Arrays.asList(combined1, combined2, incremental, batch); + checkAllPlans(sql, expected); + } +} diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java index 1395cc3c6..f8e2b4ea4 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java @@ -158,12 +158,14 @@ public void mytest1() { public void mytest2() { String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; String sql2 = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2"; - String sql = "SELECT a1, a2, g1 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1 JOIN test.gamma ON test.alpha.a2 = test.gamma.g2"; + String sql = + "SELECT a1, a2, g1 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1 JOIN test.gamma ON test.alpha.a2 = test.gamma.g2"; RelNode originalRelNode = hiveToRelConverter.convertSql(sql); RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); RelNode n2 = hiveToRelConverter.convertSql(sql2); transformer.generateIncrementalRelNodes(originalRelNode); } + @Test public void testNestedJoin() { String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1"; From 8bc08597c3130b67609cb17268fc200b9c79c96d Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Mon, 29 Jul 2024 14:44:27 -0700 Subject: [PATCH 12/21] doc: add java doc for some methods in plan generator --- .../RelNodeGenerationTransformer.java | 88 ++++++++++++++----- .../incremental/RelNodeGenerationTest.java | 12 +++ 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 3c0263e5f..6d2aa4aed 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -31,15 +31,14 @@ import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlNode; - -import com.linkedin.coral.transformers.CoralRelToSqlNodeConverter; public class RelNodeGenerationTransformer { private final String TABLE_NAME_PREFIX = "Table#"; private final String DELTA_SUFFIX = "_delta"; + private final String PREV_SUFFIX = "_prev"; + private RelOptSchema relOptSchema; private Map snapshotRelNodes; private Map deltaRelNodes; @@ -55,22 +54,13 @@ public RelNodeGenerationTransformer() { } public List> generateIncrementalRelNodes(RelNode relNode) { - getLevelRelation(relNode); + findJoinNeedsProject(relNode); relNode = uniformFormat(relNode); convertRelIncremental(relNode); - CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter(); Map snapshotRelNodes = getSnapshotRelNodes(); Map deltaRelNodes = getDeltaRelNodes(); List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes); combinedLists.add(Arrays.asList(relNode)); - for (List plan : combinedLists) { - for (RelNode node : plan) { - SqlNode sqlNode = converter.convert(node); - System.out.println(sqlNode.toSqlString(converter.INSTANCE).getSql()); - System.out.println(" "); - } - System.out.println("XXXXXXXXXX"); - } return combinedLists; } @@ -120,7 +110,14 @@ public Map getDeltaRelNodes() { return deterministicDeltaRelNodes; } - private void getLevelRelation(RelNode relNode) { + /** + * Traverses the relational algebra tree starting from the given RelNode. + * Identifies LogicalJoin nodes that may need a projection and adds them to the needsProj set. + * The traversal uses a custom RelShuttleImpl visitor that: + * - Checks if the input of LogicalJoin, LogicalFilter, LogicalUnion, and LogicalAggregate nodes is a LogicalJoin. + * - Recursively processes the inputs of RelNodes. + */ + private void findJoinNeedsProject(RelNode relNode) { RelShuttle converter = new RelShuttleImpl() { @Override @@ -134,8 +131,8 @@ public RelNode visit(LogicalJoin join) { needsProj.add(right); } - getLevelRelation(left); - getLevelRelation(right); + findJoinNeedsProject(left); + findJoinNeedsProject(right); return join; } @@ -145,14 +142,14 @@ public RelNode visit(LogicalFilter filter) { if (filter.getInput() instanceof LogicalJoin) { needsProj.add(filter.getInput()); } - getLevelRelation(filter.getInput()); + findJoinNeedsProject(filter.getInput()); return filter; } @Override public RelNode visit(LogicalProject project) { - getLevelRelation(project.getInput()); + findJoinNeedsProject(project.getInput()); return project; } @@ -163,7 +160,7 @@ public RelNode visit(LogicalUnion union) { if (child instanceof LogicalJoin) { needsProj.add(child); } - getLevelRelation(child); + findJoinNeedsProject(child); } return union; @@ -174,7 +171,7 @@ public RelNode visit(LogicalAggregate aggregate) { if (aggregate.getInput() instanceof LogicalJoin) { needsProj.add(aggregate.getInput()); } - getLevelRelation(aggregate.getInput()); + findJoinNeedsProject(aggregate.getInput()); return aggregate; } @@ -182,13 +179,28 @@ public RelNode visit(LogicalAggregate aggregate) { relNode.accept(converter); } - private RelNode convertRelPrev(RelNode originalNode) { + /** + * Converts the given relational algebra tree into its "previous" version by modifying TableScan nodes + * and transforming the structure of other relational nodes (such as LogicalJoin, LogicalFilter, LogicalProject, + * LogicalUnion, and LogicalAggregate). + * Specifically: + * - TableScan nodes are modified to point to a "_prev" version of the table. + * - Other RelNodes are recursively transformed to operate on their "previous" versions of their inputs. + *

+ * Example: + * SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x + *

+ * will be transformed to: + *

+ * SELECT * FROM test.bar1_prev JOIN test.bar2_prev ON test.bar1_prev.x = test.bar2_prev.x + */ + public RelNode convertRelPrev(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @Override public RelNode visit(TableScan scan) { RelOptTable originalTable = scan.getTable(); List incrementalNames = new ArrayList<>(originalTable.getQualifiedName()); - String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + "_prev"; + String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + PREV_SUFFIX; incrementalNames.add(deltaTableName); RelOptTable incrementalTable = RelOptTableImpl.create(originalTable.getRelOptSchema(), originalTable.getRowType(), incrementalNames, null); @@ -239,6 +251,38 @@ public RelNode visit(LogicalAggregate aggregate) { return originalNode.accept(converter); } + /** + * Transforms the given relational algebra tree to ensure a uniform format by recursively processing its nodes. + * This transformation involves: + * - For LogicalJoin nodes: recursively processing their children, and optionally creating a projection over the join + * if the join is in the needsProj set. (when the Join don't have a LogicalProject as its parent) + * - For other RelNodes: recursively processing their inputs to ensure uniformity. + * + * Example: + *

+   *            LogicalProject
+   *                  |
+   *            LogicalJoin
+   *             /        \
+   *      LogicalJoin    TableScan
+   *          /   \
+   *  TableScan  TableScan
+   * 
+ * + * will be transformed to: + *
+   *            LogicalProject
+   *                  |
+   *            LogicalJoin
+   *             /        \
+   *    LogicalProject   TableScan
+   *            |
+   *      LogicalJoin
+   *          /   \
+   *  TableScan  TableScan
+   * 
+ * + */ private RelNode uniformFormat(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java index 84fe6bb03..64e59b94f 100644 --- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java +++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java @@ -66,6 +66,18 @@ public List> getAllPlans(String sql) { return transformer.generateIncrementalRelNodes(originalRelNode); } + @Test + public void testSimpleJoinPrev() { + String sql = "SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x"; + RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer(); + RelNode originalRelNode = hiveToRelConverter.convertSql(sql); + RelNode prev = transformer.convertRelPrev(originalRelNode); + String prevSql = convert(prev); + String expected = "SELECT *\n" + "FROM test.bar1_prev AS bar1_prev\n" + + "INNER JOIN test.bar2_prev AS bar2_prev ON bar1_prev.x = bar2_prev.x"; + assertEquals(prevSql, expected); + } + @Test public void testSimpleSelectAll() { String sql = "SELECT * FROM test.foo"; From 4311d1b0cce24402468cc1edad8bddc10383bc60 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Mon, 29 Jul 2024 15:23:54 -0700 Subject: [PATCH 13/21] doc: more java doc --- .../RelNodeGenerationTransformer.java | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 6d2aa4aed..ce5afe7ea 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -53,6 +53,18 @@ public RelNodeGenerationTransformer() { needsProj = new HashSet<>(); } + /** + * Generates incremental RelNodes for the given RelNode. The incremental RelNodes are generated by: + * - Identifying the LogicalJoin nodes that may need a projection and adding them to the needsProj set. + * - Uniformly formatting the RelNode by recursively processing its nodes. + * - Converting the RelNode into its incremental version by modifying TableScan nodes and transforming the structure + * of other relational nodes (such as LogicalJoin, LogicalFilter, LogicalProject, LogicalUnion, and LogicalAggregate). + * - Populating snapshotRelNodes and deltaRelNodes with the generated RelNodes. + * - Generating a list of lists of RelNodes that represent the incremental RelNodes in different combinations. + *

+ * @param relNode input RelNode to generate incremental RelNodes for + * @return a list of lists of RelNodes that represent the incremental RelNodes in different combinations + */ public List> generateIncrementalRelNodes(RelNode relNode) { findJoinNeedsProject(relNode); relNode = uniformFormat(relNode); @@ -64,6 +76,42 @@ public List> generateIncrementalRelNodes(RelNode relNode) { return combinedLists; } + /** + * Generates a list of lists of RelNodes that represent the incremental RelNodes in different combinations. + * The formula used to generate the combinations is as follows: + * - For n tables, there are n combinations. + * - For each combination, the first i tables are delta tables and the rest are snapshot tables. + * - The combinations are generated by iterating over the deltaRelNodes and snapshotRelNodes maps and adding the delta + * tables to the combination until the index i is reached, and then adding the snapshot tables to the combination. + *

+ * Example: + *

+   *            LogicalProject#8
+   *                  |
+   *            LogicalJoin#7
+   *             /        \
+   *    LogicalProject#4   TableScan#5
+   *            |
+   *      LogicalJoin#3
+   *          /   \
+   *  TableScan#0  TableScan#1
+   * 
+ * + * LogicalProject#4 and LogicalProject#7 are two sub-queries, and each sub-query will be materialized and replaced with a TableScan. + *

+ * LogicalProject#4 will be replaced with Table0 and LogicalProject#7 will be replaced with Table1. + * There will be 3 combinations: + *

+ * Incremental: [Table0_delta, Table1_delta], which means we use incremental view maintenance for both Joins. + *

+ * Part-Batch, Part-Incremental: [Table0_delta, Table1], which means we use incremental view maintenance for the first Join and snapshot for the second Join. + *

+ * Batch: [Table0, Table1], which means we use snapshot for both Joins. + *

+ * @param deltaRelNodes map of delta RelNodes + * @param snapshotRelNodes map of snapshot RelNodes + * @return a list of lists of RelNodes that represent the incremental RelNodes in different combinations + */ private List> generateCombinedLists(Map deltaRelNodes, Map snapshotRelNodes) { List> resultList = new ArrayList<>(); @@ -116,6 +164,8 @@ public Map getDeltaRelNodes() { * The traversal uses a custom RelShuttleImpl visitor that: * - Checks if the input of LogicalJoin, LogicalFilter, LogicalUnion, and LogicalAggregate nodes is a LogicalJoin. * - Recursively processes the inputs of RelNodes. + *

+ * @param relNode input RelNode to traverse */ private void findJoinNeedsProject(RelNode relNode) { RelShuttle converter = new RelShuttleImpl() { @@ -187,6 +237,8 @@ public RelNode visit(LogicalAggregate aggregate) { * - TableScan nodes are modified to point to a "_prev" version of the table. * - Other RelNodes are recursively transformed to operate on their "previous" versions of their inputs. *

+ * @param originalNode input RelNode to transform + *

* Example: * SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x *

@@ -257,6 +309,8 @@ public RelNode visit(LogicalAggregate aggregate) { * - For LogicalJoin nodes: recursively processing their children, and optionally creating a projection over the join * if the join is in the needsProj set. (when the Join don't have a LogicalProject as its parent) * - For other RelNodes: recursively processing their inputs to ensure uniformity. + *

+ * @param originalNode input RelNode to transform * * Example: *


From 4ed498a12b632bab676d8f7c028a08f7776a80f3 Mon Sep 17 00:00:00 2001
From: yyy1000 
Date: Mon, 29 Jul 2024 15:29:35 -0700
Subject: [PATCH 14/21] fix: remove unnecessary changes

---
 .../RelNodeIncrementalTransformer.java        | 155 +----------------
 .../RelToIncrementalSqlConverterTest.java     | 160 ++++--------------
 .../coralservice/utils/IncrementalUtils.java  |   2 +-
 .../utils/VisualizationUtils.java             |   2 +-
 4 files changed, 46 insertions(+), 273 deletions(-)

diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java
index 4a31cb539..5b59c11d1 100644
--- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java
+++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeIncrementalTransformer.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
+ * Copyright 2023 LinkedIn Corporation. All rights reserved.
  * Licensed under the BSD-2 Clause license.
  * See LICENSE in the project root for license information.
  */
@@ -7,14 +7,10 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
-import org.apache.calcite.plan.RelOptSchema;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.prepare.RelOptTableImpl;
 import org.apache.calcite.rel.RelNode;
@@ -33,63 +29,16 @@
 
 public class RelNodeIncrementalTransformer {
 
-  private final String TABLE_NAME_PREFIX = "Table#";
-  private final String DELTA_SUFFIX = "_delta";
-
-  private RelOptSchema relOptSchema;
-  private Map snapshotRelNodes;
-  private Map deltaRelNodes;
-  private RelNode tempLastRelNode;
-
-  public RelNodeIncrementalTransformer() {
-    relOptSchema = null;
-    snapshotRelNodes = new LinkedHashMap<>();
-    deltaRelNodes = new LinkedHashMap<>();
-    tempLastRelNode = null;
+  private RelNodeIncrementalTransformer() {
   }
 
-  /**
-   * Returns snapshotRelNodes with deterministic keys.
-   */
-  public Map getSnapshotRelNodes() {
-    Map deterministicSnapshotRelNodes = new LinkedHashMap<>();
-    for (String description : snapshotRelNodes.keySet()) {
-      deterministicSnapshotRelNodes.put(getDeterministicDescriptionFromDescription(description, false),
-          snapshotRelNodes.get(description));
-    }
-    return deterministicSnapshotRelNodes;
-  }
-
-  /**
-   * Returns deltaRelNodes with deterministic keys.
-   */
-  public Map getDeltaRelNodes() {
-    Map deterministicDeltaRelNodes = new LinkedHashMap<>();
-    for (String description : deltaRelNodes.keySet()) {
-      deterministicDeltaRelNodes.put(getDeterministicDescriptionFromDescription(description, true),
-          deltaRelNodes.get(description));
-    }
-    return deterministicDeltaRelNodes;
-  }
-
-  /**
-   * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes.
-   * @param originalNode input RelNode to generate an incremental version for.
-   */
-  public RelNode convertRelIncremental(RelNode originalNode) {
+  public static RelNode convertRelIncremental(RelNode originalNode) {
     RelShuttle converter = new RelShuttleImpl() {
       @Override
       public RelNode visit(TableScan scan) {
         RelOptTable originalTable = scan.getTable();
-
-        // Set RelNodeIncrementalTransformer class relOptSchema if not already set
-        if (relOptSchema == null) {
-          relOptSchema = originalTable.getRelOptSchema();
-        }
-
-        // Create delta scan
         List incrementalNames = new ArrayList<>(originalTable.getQualifiedName());
-        String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + DELTA_SUFFIX;
+        String deltaTableName = incrementalNames.remove(incrementalNames.size() - 1) + "_delta";
         incrementalNames.add(deltaTableName);
         RelOptTable incrementalTable =
             RelOptTableImpl.create(originalTable.getRelOptSchema(), originalTable.getRowType(), incrementalNames, null);
@@ -105,34 +54,12 @@ public RelNode visit(LogicalJoin join) {
 
         RexBuilder rexBuilder = join.getCluster().getRexBuilder();
 
-        // Check if we can replace the left and right nodes with a scan of a materialized table
-        String leftDescription = getDescriptionFromRelNode(left, false);
-        String leftIncrementalDescription = getDescriptionFromRelNode(left, true);
-        if (snapshotRelNodes.containsKey(leftDescription)) {
-          left =
-              susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(leftDescription, false), left);
-          incrementalLeft = susbstituteWithMaterializedView(
-              getDeterministicDescriptionFromDescription(leftIncrementalDescription, true), incrementalLeft);
-        }
-        String rightDescription = getDescriptionFromRelNode(right, false);
-        String rightIncrementalDescription = getDescriptionFromRelNode(right, true);
-        if (snapshotRelNodes.containsKey(rightDescription)) {
-          right = susbstituteWithMaterializedView(getDeterministicDescriptionFromDescription(rightDescription, false),
-              right);
-          incrementalRight = susbstituteWithMaterializedView(
-              getDeterministicDescriptionFromDescription(rightIncrementalDescription, true), incrementalRight);
-        }
-
-        // We need to do this in the join to get potentially updated left and right nodes
-        tempLastRelNode = createProjectOverJoin(join, left, right, rexBuilder);
-
         LogicalProject p1 = createProjectOverJoin(join, left, incrementalRight, rexBuilder);
         LogicalProject p2 = createProjectOverJoin(join, incrementalLeft, right, rexBuilder);
         LogicalProject p3 = createProjectOverJoin(join, incrementalLeft, incrementalRight, rexBuilder);
 
         LogicalUnion unionAllJoins =
             LogicalUnion.create(Arrays.asList(LogicalUnion.create(Arrays.asList(p1, p2), true), p3), true);
-
         return unionAllJoins;
       }
 
@@ -145,16 +72,7 @@ public RelNode visit(LogicalFilter filter) {
       @Override
       public RelNode visit(LogicalProject project) {
         RelNode transformedChild = convertRelIncremental(project.getInput());
-        RelNode materializedProject = getTempLastRelNode();
-        if (materializedProject != null) {
-          snapshotRelNodes.put(getDescriptionFromRelNode(project, false), materializedProject);
-        } else {
-          snapshotRelNodes.put(getDescriptionFromRelNode(project, false), project);
-        }
-        LogicalProject transformedProject =
-            LogicalProject.create(transformedChild, project.getProjects(), project.getRowType());
-        deltaRelNodes.put(getDescriptionFromRelNode(project, true), transformedProject);
-        return transformedProject;
+        return LogicalProject.create(transformedChild, project.getProjects(), project.getRowType());
       }
 
       @Override
@@ -175,67 +93,8 @@ public RelNode visit(LogicalAggregate aggregate) {
     return originalNode.accept(converter);
   }
 
-  /**
-   * Returns the tempLastRelNode and sets the variable back to null. Should only be called once for each retrieval
-   * instance since subsequent consecutive calls will yield null.
-   */
-  private RelNode getTempLastRelNode() {
-    RelNode currentTempLastRelNode = tempLastRelNode;
-    tempLastRelNode = null;
-    return currentTempLastRelNode;
-  }
-
-  /**
-   * Returns the corresponding description for a given RelNode by extracting the identifier (ex. the identifier for
-   * LogicalProject#22 is 22) and prepending the TABLE_NAME_PREFIX. Depending on the delta value, a delta suffix may be
-   * appended.
-   * @param relNode RelNode from which the identifier will be retrieved.
-   * @param delta configure whether to get the delta name
-   */
-  private String getDescriptionFromRelNode(RelNode relNode, boolean delta) {
-    String identifier = relNode.getDescription().split("#")[1];
-    String description = TABLE_NAME_PREFIX + identifier;
-    if (delta) {
-      return description + DELTA_SUFFIX;
-    }
-    return description;
-  }
-
-  /**
-   * Returns a description based on mapping index order that will stay the same across different runs of the same
-   * query. The description consists of the table prefix, the index, and optionally, the delta suffix.
-   * @param description output from calling getDescriptionFromRelNode()
-   * @param delta configure whether to get the delta name
-   */
-  private String getDeterministicDescriptionFromDescription(String description, boolean delta) {
-    if (delta) {
-      List deltaKeyOrdering = new ArrayList<>(deltaRelNodes.keySet());
-      return TABLE_NAME_PREFIX + deltaKeyOrdering.indexOf(description) + DELTA_SUFFIX;
-    } else {
-      List snapshotKeyOrdering = new ArrayList<>(snapshotRelNodes.keySet());
-      return TABLE_NAME_PREFIX + snapshotKeyOrdering.indexOf(description);
-    }
-  }
-
-  /**
-   * Accepts a table name and RelNode and creates a TableScan over the RelNode using the class relOptSchema.
-   * @param relOptTableName table name corresponding to table to scan over
-   * @param relNode top-level RelNode that will be replaced with the TableScan
-   */
-  private TableScan susbstituteWithMaterializedView(String relOptTableName, RelNode relNode) {
-    RelOptTable table =
-        RelOptTableImpl.create(relOptSchema, relNode.getRowType(), Collections.singletonList(relOptTableName), null);
-    return LogicalTableScan.create(relNode.getCluster(), table);
-  }
-
-  /** Creates a LogicalProject whose input is an incremental LogicalJoin node that is constructed from a left and right
-   * RelNode and LogicalJoin.
-   * @param join LogicalJoin to create the incremental join from
-   * @param left left RelNode child of the incremental join
-   * @param right right RelNode child of the incremental join
-   * @param rexBuilder RexBuilder for LogicalProject creation
-   */
-  private LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right, RexBuilder rexBuilder) {
+  private static LogicalProject createProjectOverJoin(LogicalJoin join, RelNode left, RelNode right,
+      RexBuilder rexBuilder) {
     LogicalJoin incrementalJoin =
         LogicalJoin.create(left, right, join.getCondition(), join.getVariablesSet(), join.getJoinType());
     ArrayList projects = new ArrayList<>();
diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java
index f8e2b4ea4..3ac0cd683 100644
--- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java
+++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelToIncrementalSqlConverterTest.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
+ * Copyright 2023 LinkedIn Corporation. All rights reserved.
  * Licensed under the BSD-2 Clause license.
  * See LICENSE in the project root for license information.
  */
@@ -7,9 +7,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
 
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.sql.SqlNode;
@@ -44,8 +41,7 @@ public void afterClass() throws IOException {
   }
 
   public String convert(RelNode relNode) {
-    RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer();
-    RelNode incrementalRelNode = transformer.convertRelIncremental(relNode);
+    RelNode incrementalRelNode = RelNodeIncrementalTransformer.convertRelIncremental(relNode);
     CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter();
     SqlNode sqlNode = converter.convert(incrementalRelNode);
     return sqlNode.toSqlString(converter.INSTANCE).getSql();
@@ -56,28 +52,6 @@ public String getIncrementalModification(String sql) {
     return convert(originalRelNode);
   }
 
-  public void checkAllSnapshotAndDeltaQueries(String sql, Map snapshotExpected,
-      Map deltaExpected) {
-    RelNode originalRelNode = hiveToRelConverter.convertSql(sql);
-    CoralRelToSqlNodeConverter converter = new CoralRelToSqlNodeConverter();
-    RelNodeIncrementalTransformer transformer = new RelNodeIncrementalTransformer();
-    transformer.convertRelIncremental(originalRelNode);
-    Map snapshotRelNodes = transformer.getSnapshotRelNodes();
-    Map deltaRelNodes = transformer.getDeltaRelNodes();
-    for (String key : snapshotRelNodes.keySet()) {
-      RelNode actualSnapshotRelNode = snapshotRelNodes.get(key);
-      SqlNode sqlNode = converter.convert(actualSnapshotRelNode);
-      String actualSql = sqlNode.toSqlString(converter.INSTANCE).getSql();
-      assertEquals(actualSql, snapshotExpected.get(key));
-    }
-    for (String key : deltaRelNodes.keySet()) {
-      RelNode actualDeltaRelNode = deltaRelNodes.get(key);
-      SqlNode sqlNode = converter.convert(actualDeltaRelNode);
-      String actualSql = sqlNode.toSqlString(converter.INSTANCE).getSql();
-      assertEquals(actualSql, deltaExpected.get(key));
-    }
-  }
-
   @Test
   public void testSimpleSelectAll() {
     String sql = "SELECT * FROM test.foo";
@@ -107,6 +81,41 @@ public void testJoinWithFilter() {
     assertEquals(getIncrementalModification(sql), expected);
   }
 
+  @Test
+  public void testJoinWithNestedFilter() {
+    String sql =
+        "WITH tmp AS (SELECT * from test.bar1 WHERE test.bar1.x > 10), tmp2 AS (SELECT * from test.bar2) SELECT * FROM tmp JOIN tmp2 ON tmp.x = tmp2.x";
+    String expected = "SELECT *\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar1\n"
+        + "WHERE bar1.x > 10) AS t\n" + "INNER JOIN test.bar2_delta AS bar2_delta ON t.x = bar2_delta.x\n"
+        + "UNION ALL\n" + "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1_delta AS bar1_delta\n"
+        + "WHERE bar1_delta.x > 10) AS t0\n" + "INNER JOIN test.bar2 AS bar2 ON t0.x = bar2.x) AS t1\n" + "UNION ALL\n"
+        + "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1_delta AS bar1_delta0\n"
+        + "WHERE bar1_delta0.x > 10) AS t2\n" + "INNER JOIN test.bar2_delta AS bar2_delta0 ON t2.x = bar2_delta0.x";
+    assertEquals(getIncrementalModification(sql), expected);
+  }
+
+  @Test
+  public void testNestedJoin() {
+    String sql =
+        "WITH tmp AS (SELECT * FROM test.bar1 INNER JOIN test.bar2 ON test.bar1.x = test.bar2.x) SELECT * FROM tmp INNER JOIN test.bar3 ON tmp.x = test.bar3.x";
+    String expected = "SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar1\n"
+        + "INNER JOIN test.bar2 AS bar2 ON bar1.x = bar2.x\n"
+        + "INNER JOIN test.bar3_delta AS bar3_delta ON bar1.x = bar3_delta.x\n" + "UNION ALL\n" + "SELECT *\n"
+        + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.bar1 AS bar10\n"
+        + "INNER JOIN test.bar2_delta AS bar2_delta ON bar10.x = bar2_delta.x\n" + "UNION ALL\n" + "SELECT *\n"
+        + "FROM test.bar1_delta AS bar1_delta\n" + "INNER JOIN test.bar2 AS bar20 ON bar1_delta.x = bar20.x) AS t\n"
+        + "UNION ALL\n" + "SELECT *\n" + "FROM test.bar1_delta AS bar1_delta0\n"
+        + "INNER JOIN test.bar2_delta AS bar2_delta0 ON bar1_delta0.x = bar2_delta0.x) AS t0\n"
+        + "INNER JOIN test.bar3 AS bar3 ON t0.x = bar3.x) AS t1\n" + "UNION ALL\n" + "SELECT *\n" + "FROM (SELECT *\n"
+        + "FROM (SELECT *\n" + "FROM test.bar1 AS bar11\n"
+        + "INNER JOIN test.bar2_delta AS bar2_delta1 ON bar11.x = bar2_delta1.x\n" + "UNION ALL\n" + "SELECT *\n"
+        + "FROM test.bar1_delta AS bar1_delta1\n" + "INNER JOIN test.bar2 AS bar21 ON bar1_delta1.x = bar21.x) AS t2\n"
+        + "UNION ALL\n" + "SELECT *\n" + "FROM test.bar1_delta AS bar1_delta2\n"
+        + "INNER JOIN test.bar2_delta AS bar2_delta2 ON bar1_delta2.x = bar2_delta2.x) AS t3\n"
+        + "INNER JOIN test.bar3_delta AS bar3_delta0 ON t3.x = bar3_delta0.x";
+    assertEquals(getIncrementalModification(sql), expected);
+  }
+
   @Test
   public void testUnion() {
     String sql = "SELECT * FROM test.bar1 UNION SELECT * FROM test.bar2 UNION SELECT * FROM test.bar3";
@@ -134,99 +143,4 @@ public void testSelectSpecificJoin() {
         + "INNER JOIN test.bar2_delta AS bar2_delta0 ON bar1_delta0.x = bar2_delta0.x) AS t0";
     assertEquals(getIncrementalModification(sql), expected);
   }
-
-  @Test
-  public void testSimpleJ() {
-    String sql = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1";
-    RelNode originalRelNode = hiveToRelConverter.convertSql(sql);
-    RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer();
-    List> nodes = transformer.generateIncrementalRelNodes(originalRelNode);
-    assertEquals(nodes.size(), 2);
-  }
-
-  @Test
-  public void mytest1() {
-    String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1";
-    String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2";
-    RelNode originalRelNode = hiveToRelConverter.convertSql(sql);
-    RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer();
-    List> nodes = transformer.generateIncrementalRelNodes(originalRelNode);
-    assertEquals(nodes.size(), 3);
-  }
-
-  @Test
-  public void mytest2() {
-    String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1";
-    String sql2 = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2";
-    String sql =
-        "SELECT a1, a2, g1 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1 JOIN test.gamma ON test.alpha.a2 = test.gamma.g2";
-    RelNode originalRelNode = hiveToRelConverter.convertSql(sql);
-    RelNodeGenerationTransformer transformer = new RelNodeGenerationTransformer();
-    RelNode n2 = hiveToRelConverter.convertSql(sql2);
-    transformer.generateIncrementalRelNodes(originalRelNode);
-  }
-
-  @Test
-  public void testNestedJoin() {
-    String nestedJoin = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1";
-    String sql = "SELECT a2, g1 FROM (" + nestedJoin + ") AS nj JOIN test.gamma ON nj.a2 = test.gamma.g2";
-    Map snapshotExpected = new LinkedHashMap<>();
-    snapshotExpected.put("Table#0",
-        "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1");
-    snapshotExpected.put("Table#1",
-        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2");
-    Map deltaExpected = new LinkedHashMap<>();
-    deltaExpected.put("Table#0_delta",
-        "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha AS alpha0\n"
-            + "INNER JOIN test.beta_delta AS beta_delta ON alpha0.a1 = beta_delta.b1\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM test.alpha_delta AS alpha_delta\n"
-            + "INNER JOIN test.beta AS beta0 ON alpha_delta.a1 = beta0.b1) AS t\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM test.alpha_delta AS alpha_delta0\n"
-            + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0");
-    deltaExpected.put("Table#1_delta",
-        "SELECT t3.a2, t3.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#00\n"
-            + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#00.a2 = gamma_delta.g2\n" + "UNION ALL\n"
-            + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n"
-            + "INNER JOIN test.gamma AS gamma0 ON Table#0_delta.a2 = gamma0.g2) AS t2\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM Table#0_delta AS Table#0_delta0\n"
-            + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t3");
-    checkAllSnapshotAndDeltaQueries(sql, snapshotExpected, deltaExpected);
-  }
-
-  @Test
-  public void testThreeNestedJoins() {
-    String nestedJoin1 = "SELECT a1, a2 FROM test.alpha JOIN test.beta ON test.alpha.a1 = test.beta.b1";
-    String nestedJoin2 = "SELECT a2, g1 FROM (" + nestedJoin1 + ") AS nj1 JOIN test.gamma ON nj1.a2 = test.gamma.g2";
-    String sql = "SELECT g1, e2 FROM (" + nestedJoin2 + ") AS nj2 JOIN test.epsilon ON nj2.g1 = test.epsilon.e1";
-    Map snapshotExpected = new LinkedHashMap<>();
-    snapshotExpected.put("Table#0",
-        "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1");
-    snapshotExpected.put("Table#1",
-        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2");
-    snapshotExpected.put("Table#2",
-        "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1");
-    Map deltaExpected = new LinkedHashMap<>();
-    deltaExpected.put("Table#0_delta",
-        "SELECT t0.a1, t0.a2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM test.alpha AS alpha0\n"
-            + "INNER JOIN test.beta_delta AS beta_delta ON alpha0.a1 = beta_delta.b1\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM test.alpha_delta AS alpha_delta\n"
-            + "INNER JOIN test.beta AS beta0 ON alpha_delta.a1 = beta0.b1) AS t\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM test.alpha_delta AS alpha_delta0\n"
-            + "INNER JOIN test.beta_delta AS beta_delta0 ON alpha_delta0.a1 = beta_delta0.b1) AS t0");
-    deltaExpected.put("Table#1_delta",
-        "SELECT t3.a2, t3.g1\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#0 AS Table#00\n"
-            + "INNER JOIN test.gamma_delta AS gamma_delta ON Table#00.a2 = gamma_delta.g2\n" + "UNION ALL\n"
-            + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta\n"
-            + "INNER JOIN test.gamma AS gamma0 ON Table#0_delta.a2 = gamma0.g2) AS t2\n" + "UNION ALL\n" + "SELECT *\n"
-            + "FROM Table#0_delta AS Table#0_delta0\n"
-            + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t3");
-    deltaExpected.put("Table#2_delta",
-        "SELECT t6.g1, t6.e2\n" + "FROM (SELECT *\n" + "FROM (SELECT *\n" + "FROM Table#1 AS Table#10\n"
-            + "INNER JOIN test.epsilon_delta AS epsilon_delta ON Table#10.g1 = epsilon_delta.e1\n" + "UNION ALL\n"
-            + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta\n"
-            + "INNER JOIN test.epsilon AS epsilon0 ON Table#1_delta.g1 = epsilon0.e1) AS t5\n" + "UNION ALL\n"
-            + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta0\n"
-            + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#1_delta0.g1 = epsilon_delta0.e1) AS t6");
-    checkAllSnapshotAndDeltaQueries(sql, snapshotExpected, deltaExpected);
-  }
 }
diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
index 6f931309c..42d194adc 100644
--- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
+++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
@@ -30,7 +30,7 @@ public static String getIncrementalQuery(String query, String sourceLanguage, St
         break;
     }
 
-    RelNode incrementalRelNode = new RelNodeIncrementalTransformer().convertRelIncremental(originalNode);
+    RelNode incrementalRelNode = RelNodeIncrementalTransformer.convertRelIncremental(originalNode);
 
     switch (targetLanguage.toLowerCase()) {
       case "trino":
diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
index 4655801b4..ec3117df6 100644
--- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
+++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
@@ -46,7 +46,7 @@ public ArrayList generateIRVisualizations(String query, String sourceLangu
       // Pass in pre-rewrite rel node
       switch (rewriteType) {
         case INCREMENTAL:
-          postRewriteRelNode = new RelNodeIncrementalTransformer().convertRelIncremental(relNode);
+          postRewriteRelNode = RelNodeIncrementalTransformer.convertRelIncremental(relNode);
           break;
         case DATAMASKING:
         default:

From 2d638fa3b27227eecace8977ec1062a8cedb8a65 Mon Sep 17 00:00:00 2001
From: yyy1000 
Date: Mon, 29 Jul 2024 15:32:05 -0700
Subject: [PATCH 15/21] fix: remove unnecessary changes

---
 .../linkedin/coral/coralservice/utils/IncrementalUtils.java    | 3 +--
 .../linkedin/coral/coralservice/utils/VisualizationUtils.java  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
index 42d194adc..3ee6687f7 100644
--- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
+++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
+ * Copyright 2023 LinkedIn Corporation. All rights reserved.
  * Licensed under the BSD-2 Clause license.
  * See LICENSE in the project root for license information.
  */
@@ -40,7 +40,6 @@ public static String getIncrementalQuery(String query, String sourceLanguage, St
         CoralSpark coralSpark = CoralSpark.create(incrementalRelNode, hiveMetastoreClient);
         return coralSpark.getSparkSql();
     }
-
   }
 
 }
diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
index ec3117df6..a3b91bd94 100644
--- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
+++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2024 LinkedIn Corporation. All rights reserved.
+ * Copyright 2022-2023 LinkedIn Corporation. All rights reserved.
  * Licensed under the BSD-2 Clause license.
  * See LICENSE in the project root for license information.
  */

From 0fef86c54aa137ef90cef67d5b3b23c2ab823946 Mon Sep 17 00:00:00 2001
From: yyy1000 
Date: Thu, 1 Aug 2024 09:12:46 -0700
Subject: [PATCH 16/21] doc: update java doc

---
 .../coral/incremental/RelNodeGenerationTransformer.java     | 6 +++---
 coral-service/frontend/env.local                            | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 coral-service/frontend/env.local

diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
index ce5afe7ea..61f45d658 100644
--- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
+++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
@@ -102,11 +102,11 @@ public List> generateIncrementalRelNodes(RelNode relNode) {
    * LogicalProject#4 will be replaced with Table0 and LogicalProject#7 will be replaced with Table1.
    * There will be 3 combinations:
    * 

- * Incremental: [Table0_delta, Table1_delta], which means we use incremental view maintenance for both Joins. + * Incremental: [Table0_delta, Table1_delta], which means incremental execution for both Joins. *

- * Part-Batch, Part-Incremental: [Table0_delta, Table1], which means we use incremental view maintenance for the first Join and snapshot for the second Join. + * Part-Batch, Part-Incremental: [Table0_delta, Table1], which means incremental execution for the first Join and batch execution for the second Join. *

- * Batch: [Table0, Table1], which means we use snapshot for both Joins. + * Batch: [Table0, Table1], which means batch execution for both Joins. *

* @param deltaRelNodes map of delta RelNodes * @param snapshotRelNodes map of snapshot RelNodes diff --git a/coral-service/frontend/env.local b/coral-service/frontend/env.local new file mode 100644 index 000000000..89293e582 --- /dev/null +++ b/coral-service/frontend/env.local @@ -0,0 +1,2 @@ +# Base URL of the Coral Service API, default is http://localhost:8080 +NEXT_PUBLIC_CORAL_SERVICE_API_URL="http://localhost:8080" From dbc8d446b2721414cf42e5f9fa5036284dac2558 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Thu, 1 Aug 2024 09:21:37 -0700 Subject: [PATCH 17/21] docs: update java doc --- .../coral/incremental/RelNodeGenerationTransformer.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 61f45d658..e7a06010a 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -83,8 +83,10 @@ public List> generateIncrementalRelNodes(RelNode relNode) { * - For each combination, the first i tables are delta tables and the rest are snapshot tables. * - The combinations are generated by iterating over the deltaRelNodes and snapshotRelNodes maps and adding the delta * tables to the combination until the index i is reached, and then adding the snapshot tables to the combination. + * That means each generated plan would be a combination of incremental plan and batch plan, consisting of + * a List of RelNodes, denoting each sub-query will be incremental executed or batch executed. *

- * Example: + * Take the following three-tables Join as an example: *

    *            LogicalProject#8
    *                  |

From a990bd8afbfcc05b14e126e71394fa935c48728a Mon Sep 17 00:00:00 2001
From: yyy1000 
Date: Fri, 2 Aug 2024 14:29:49 -0700
Subject: [PATCH 18/21] docs: update java doc

---
 .../coral/incremental/RelNodeGenerationTransformer.java   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
index e7a06010a..1b74347f3 100644
--- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
+++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
@@ -79,7 +79,7 @@ public List> generateIncrementalRelNodes(RelNode relNode) {
   /**
    * Generates a list of lists of RelNodes that represent the incremental RelNodes in different combinations.
    * The formula used to generate the combinations is as follows:
-   * - For n tables, there are n combinations.
+   * - For n subquery, there are n combinations.
    * - For each combination, the first i tables are delta tables and the rest are snapshot tables.
    * - The combinations are generated by iterating over the deltaRelNodes and snapshotRelNodes maps and adding the delta
    *  tables to the combination until the index i is reached, and then adding the snapshot tables to the combination.
@@ -104,11 +104,11 @@ public List> generateIncrementalRelNodes(RelNode relNode) {
    * LogicalProject#4 will be replaced with Table0 and LogicalProject#7 will be replaced with Table1.
    * There will be 3 combinations:
    * 

- * Incremental: [Table0_delta, Table1_delta], which means incremental execution for both Joins. + * Incremental: [Table0_delta, Table1_delta], which means both joins are executed incrementally. *

- * Part-Batch, Part-Incremental: [Table0_delta, Table1], which means incremental execution for the first Join and batch execution for the second Join. + * Part-Batch, Part-Incremental: [Table0_delta, Table1], which means The first join is executed incrementally, and the second join is executed in batch mode. *

- * Batch: [Table0, Table1], which means batch execution for both Joins. + * Batch: [Table0, Table1], which means both joins are executed in batch mode. *

* @param deltaRelNodes map of delta RelNodes * @param snapshotRelNodes map of snapshot RelNodes From de299128c28d548b5b4cc20b016100218f658489 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Tue, 6 Aug 2024 09:44:09 -0700 Subject: [PATCH 19/21] docs: add input and output to java doc --- .../RelNodeGenerationTransformer.java | 183 ++++++++++++++---- 1 file changed, 143 insertions(+), 40 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 1b74347f3..2ec76257e 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -57,9 +57,7 @@ public RelNodeGenerationTransformer() { * Generates incremental RelNodes for the given RelNode. The incremental RelNodes are generated by: * - Identifying the LogicalJoin nodes that may need a projection and adding them to the needsProj set. * - Uniformly formatting the RelNode by recursively processing its nodes. - * - Converting the RelNode into its incremental version by modifying TableScan nodes and transforming the structure - * of other relational nodes (such as LogicalJoin, LogicalFilter, LogicalProject, LogicalUnion, and LogicalAggregate). - * - Populating snapshotRelNodes and deltaRelNodes with the generated RelNodes. + * - When converting the RelNode into its incremental version, populating snapshotRelNodes and deltaRelNodes with the generated RelNodes. * - Generating a list of lists of RelNodes that represent the incremental RelNodes in different combinations. *

* @param relNode input RelNode to generate incremental RelNodes for @@ -95,13 +93,40 @@ public List> generateIncrementalRelNodes(RelNode relNode) { * LogicalProject#4 TableScan#5 * | * LogicalJoin#3 - * / \ - * TableScan#0 TableScan#1 + * / \ + * TableScan(Table_A) TableScan(Table_B) + *

+ * + * LogicalProject#4 and LogicalProject#8 are two sub-queries, and each sub-query will be materialized and replaced with a TableScan. + * LogicalProject#4 will be replaced with Table0 and LogicalProject#8 will be replaced with Table1 like below. + *
+   *    LogicalProject#4
+   *            |
+   *      LogicalJoin#3            =>          TableScan(Table0)
+   *          /        \
+   * TableScan(Table_A)  TableScan(Table_B)
+   * 
+ *
+   *            LogicalProject#8
+   *                  |
+   *            LogicalJoin#7
+   *             /        \
+   *    LogicalProject#4   TableScan#5
+   *            |
+   *      LogicalJoin#3
+   *          /         \
+   * TableScan(Table_A)  TableScan(Table_B)
+   * 
+ * will be replaced with + *
+   *            LogicalProject#8
+   *                  |
+   *            LogicalJoin#7      =>        TableScan(Table1)
+   *             /        \
+   *    TableScan(Table0)   TableScan#5
    * 
* - * LogicalProject#4 and LogicalProject#7 are two sub-queries, and each sub-query will be materialized and replaced with a TableScan. *

- * LogicalProject#4 will be replaced with Table0 and LogicalProject#7 will be replaced with Table1. * There will be 3 combinations: *

* Incremental: [Table0_delta, Table1_delta], which means both joins are executed incrementally. @@ -162,10 +187,38 @@ public Map getDeltaRelNodes() { /** * Traverses the relational algebra tree starting from the given RelNode. - * Identifies LogicalJoin nodes that may need a projection and adds them to the needsProj set. + * Identifies LogicalJoin nodes that needs a projection and adds them to the needsProj set. * The traversal uses a custom RelShuttleImpl visitor that: - * - Checks if the input of LogicalJoin, LogicalFilter, LogicalUnion, and LogicalAggregate nodes is a LogicalJoin. - * - Recursively processes the inputs of RelNodes. + * - Recursively processing when the RelNode is not a LogicalProject and check its inputs. + * - If one input node is LogicalJoin, the LogicalJoin node is added to the needsProj set. + *

+ * For example, consider the following queries for a two-tables Join + *

+ * Input1: + *

+   *               LogicalProject
+   *                     |
+   *               LogicalJoin
+   *                 /        \
+   *         LogicalJoin(*)    TableScan(Table_C)
+   *           /          \
+   *  TableScan(Table_A)  TableScan(Table_B)
+   * 
+ * LogicalJoin(*) is a LogicalJoin node that doesn't have a LogicalProject parent, so it is added to the needsProj set. + *

+ * Input2: + *

+   *               LogicalProject
+   *                     |
+   *               LogicalJoin
+   *                 /        \
+   *      LogicalProject  TableScan(Table_C)
+   *                |
+   *          LogicalJoin
+   *          /          \
+   *  TableScan(Table_A)  TableScan(Table_B)
+   * 
+ * In this case, all LogicalJoin nodes have a LogicalProject parent, so no one is added to the needsProj set. *

* @param relNode input RelNode to traverse */ @@ -239,14 +292,37 @@ public RelNode visit(LogicalAggregate aggregate) { * - TableScan nodes are modified to point to a "_prev" version of the table. * - Other RelNodes are recursively transformed to operate on their "previous" versions of their inputs. *

- * @param originalNode input RelNode to transform + * For example the following query for a two tables Join: + * Input: + *

+   *            LogicalProject
+   *                  |
+   *            LogicalJoin
+   *             /        \
+   *      TableScan#A    TableScan#B
+   * 
+ * + * Output: + *
+   *            LogicalProject
+   *                  |
+   *            LogicalJoin
+   *             /        \
+   *   TableScan#A_prev    TableScan#B_prev
+   * 
+ * In SQL view, the transformation is: + *

+ * {@code + * SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x} *

- * Example: - * SELECT * FROM test.bar1 JOIN test.bar2 ON test.bar1.x = test.bar2.x + * to *

- * will be transformed to: + * {@code + * SELECT * FROM test.bar1_prev JOIN test.bar2_prev ON test.bar1_prev.x = test.bar2_prev.x} + * + *

+ * @param originalNode input RelNode to transform *

- * SELECT * FROM test.bar1_prev JOIN test.bar2_prev ON test.bar1_prev.x = test.bar2_prev.x */ public RelNode convertRelPrev(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @@ -306,38 +382,40 @@ public RelNode visit(LogicalAggregate aggregate) { } /** - * Transforms the given relational algebra tree to ensure a uniform format by recursively processing its nodes. + * Transforms the given relational algebra tree to ensure a uniform format that each LogicalJoin has a LogicalProject as its parent. * This transformation involves: * - For LogicalJoin nodes: recursively processing their children, and optionally creating a projection over the join * if the join is in the needsProj set. (when the Join don't have a LogicalProject as its parent) - * - For other RelNodes: recursively processing their inputs to ensure uniformity. + * - For other type RelNode: recursively processing its inputs, and using the transformed children as its new inputs. *

- * @param originalNode input RelNode to transform - * - * Example: + * Here is an example of how the uniformFormat method works for a three-tables join query + *

+ * Input: *

-   *            LogicalProject
-   *                  |
-   *            LogicalJoin
-   *             /        \
-   *      LogicalJoin    TableScan
-   *          /   \
-   *  TableScan  TableScan
+   *               LogicalProject
+   *                     |
+   *               LogicalJoin
+   *                 /        \
+   *           LogicalJoin    TableScan(Table_C)
+   *           /          \
+   *  TableScan(Table_A)  TableScan(Table_B)
    * 
* - * will be transformed to: + * + * Output: *
-   *            LogicalProject
-   *                  |
-   *            LogicalJoin
-   *             /        \
-   *    LogicalProject   TableScan
-   *            |
-   *      LogicalJoin
-   *          /   \
-   *  TableScan  TableScan
+   *               LogicalProject
+   *                     |
+   *               LogicalJoin
+   *                 /        \
+   *      LogicalProject(*)   TableScan(Table_C)
+   *                |
+   *          LogicalJoin
+   *          /          \
+   *  TableScan(Table_A)  TableScan(Table_B)
    * 
- * + * The LogicalProject(*) is added to ensure the uniform format. + * @param originalNode input RelNode to transform */ private RelNode uniformFormat(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { @@ -388,8 +466,33 @@ public RelNode visit(LogicalAggregate aggregate) { } /** - * Convert an input RelNode to an incremental RelNode. Populates snapshotRelNodes and deltaRelNodes. - * @param originalNode input RelNode to generate an incremental version for. + * Converts an input {@link RelNode} to its incremental version. This method traverses the input + * {@link RelNode} tree and transforms it by creating incremental version of the node. + * It also populates the {@code snapshotRelNodes} and {@code deltaRelNodes} collections, which + * are used to keep track of materialized views and incremental transformations, respectively. + * {@code snapshotRelNodes} and {@code deltaRelNodes} are used to generate the all incremental + * combinations of the input {@link RelNode}. + * + *

+ * The {@code snapshotRelNodes} collection is populated with the materialized versions of the + * nodes, which are intermediate snapshots of the original nodes. The {@code deltaRelNodes} + * collection is populated with the incremental versions of the nodes, which represent the + * deltas that need to be applied to the original nodes. + *

+ * + *

+ * The method specifically adds new elements to {@code snapshotRelNodes} and {@code deltaRelNodes} + * during the transformation of {@link LogicalProject} nodes: + *

+ *
    + *
  • {@code snapshotRelNodes}: This collection is updated with the original {@link LogicalProject} + * node or its materialized version, which is obtained from {@code getTempLastRelNode()} if available.
  • + *
  • {@code deltaRelNodes}: This collection is updated with the incremental version of the {@link LogicalProject} + * node, which is created by transforming the child node and preserving the project's expressions and row type.
  • + *
+ * + * @param originalNode the input {@link RelNode} to generate an incremental version for. + * @return the incremental version of the input {@link RelNode}. */ public RelNode convertRelIncremental(RelNode originalNode) { RelShuttle converter = new RelShuttleImpl() { From 74c8eb9e0530f221ae29c10f283577ab46a72396 Mon Sep 17 00:00:00 2001 From: yyy1000 Date: Tue, 6 Aug 2024 09:54:17 -0700 Subject: [PATCH 20/21] docs: add table name notation --- .../incremental/RelNodeGenerationTransformer.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java index 2ec76257e..bae645342 100644 --- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java +++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java @@ -84,7 +84,7 @@ public List> generateIncrementalRelNodes(RelNode relNode) { * That means each generated plan would be a combination of incremental plan and batch plan, consisting of * a List of RelNodes, denoting each sub-query will be incremental executed or batch executed. *

- * Take the following three-tables Join as an example: + * Take the following three-tables Join as an example (table names are enclosed in parentheses of the RelNodes): *

    *            LogicalProject#8
    *                  |
@@ -292,14 +292,14 @@ public RelNode visit(LogicalAggregate aggregate) {
    * - TableScan nodes are modified to point to a "_prev" version of the table.
    * - Other RelNodes are recursively transformed to operate on their "previous" versions of their inputs.
    * 

- * For example the following query for a two tables Join: + * For example the following query for a two tables Join (table names are enclosed in parentheses of the RelNodes): * Input: *

    *            LogicalProject
    *                  |
    *            LogicalJoin
    *             /        \
-   *      TableScan#A    TableScan#B
+   *   TableScan(Table_A)    TableScan(Table_B)
    * 
* * Output: @@ -308,7 +308,7 @@ public RelNode visit(LogicalAggregate aggregate) { * | * LogicalJoin * / \ - * TableScan#A_prev TableScan#B_prev + * TableScan(Table_A_prev) TableScan(Table_B_prev) *
* In SQL view, the transformation is: *

@@ -388,7 +388,7 @@ public RelNode visit(LogicalAggregate aggregate) { * if the join is in the needsProj set. (when the Join don't have a LogicalProject as its parent) * - For other type RelNode: recursively processing its inputs, and using the transformed children as its new inputs. *

- * Here is an example of how the uniformFormat method works for a three-tables join query + * Here is an example of how the uniformFormat method works for a three-tables join query, (table names are enclosed in parentheses of the RelNodes): *

* Input: *


From a601a990f19f6de0963b523418c75fc148db4b8f Mon Sep 17 00:00:00 2001
From: yyy1000 
Date: Wed, 7 Aug 2024 11:56:11 -0700
Subject: [PATCH 21/21] feat: make batch execution stages

---
 .../RelNodeGenerationTransformer.java         |  3 +-
 .../incremental/RelNodeGenerationTest.java    | 44 +++++++++----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
index bae645342..d97bff411 100644
--- a/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
+++ b/coral-incremental/src/main/java/com/linkedin/coral/incremental/RelNodeGenerationTransformer.java
@@ -70,7 +70,6 @@ public List> generateIncrementalRelNodes(RelNode relNode) {
     Map snapshotRelNodes = getSnapshotRelNodes();
     Map deltaRelNodes = getDeltaRelNodes();
     List> combinedLists = generateCombinedLists(deltaRelNodes, snapshotRelNodes);
-    combinedLists.add(Arrays.asList(relNode));
     return combinedLists;
   }
 
@@ -145,7 +144,7 @@ private List> generateCombinedLists(Map deltaRelN
     assert (deltaRelNodes.size() == snapshotRelNodes.size());
     int n = deltaRelNodes.size();
 
-    for (int i = 0; i < n; i++) {
+    for (int i = -1; i < n; i++) {
       List tempList = new ArrayList<>();
       for (int j = 0; j < n; j++) {
         if (j <= i) {
diff --git a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java
index 64e59b94f..42980c0a7 100644
--- a/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java
+++ b/coral-incremental/src/test/java/com/linkedin/coral/incremental/RelNodeGenerationTest.java
@@ -121,13 +121,13 @@ public void testNestedJoin() {
             + "INNER JOIN test.gamma_prev AS gamma_prev ON Table#0_delta.a2 = gamma_prev.g2) AS t\n" + "UNION ALL\n"
             + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta0\n"
             + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0";
-    List combined = Arrays.asList(Table0_delta,
-        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2");
+    String Table0 = "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1";
+    String Table1 =
+        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2";
+    List combined = Arrays.asList(Table0_delta, Table1);
     List incremental = Arrays.asList(Table0_delta, Table1_delta);
-    List batch = Arrays.asList("SELECT t.a2, gamma.g1\n" + "FROM (SELECT alpha.a1, alpha.a2\n"
-        + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1) AS t\n"
-        + "INNER JOIN test.gamma AS gamma ON t.a2 = gamma.g2");
-    List> expected = Arrays.asList(combined, incremental, batch);
+    List batch = Arrays.asList(Table0, Table1);
+    List> expected = Arrays.asList(batch, combined, incremental);
     checkAllPlans(sql, expected);
   }
 
@@ -148,13 +148,13 @@ public void testThreeTablesJoin() {
             + "INNER JOIN test.gamma_prev AS gamma_prev ON Table#0_delta.a2 = gamma_prev.g2) AS t\n" + "UNION ALL\n"
             + "SELECT *\n" + "FROM Table#0_delta AS Table#0_delta0\n"
             + "INNER JOIN test.gamma_delta AS gamma_delta0 ON Table#0_delta0.a2 = gamma_delta0.g2) AS t0";
-    List combined = Arrays.asList(Table0_delta,
-        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2");
+    String Table0 = "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1";
+    String Table1 =
+        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2";
+    List combined = Arrays.asList(Table0_delta, Table1);
     List incremental = Arrays.asList(Table0_delta, Table1_delta);
-    List batch = Arrays.asList("SELECT alpha.a1, alpha.a2, gamma.g1\n" + "FROM test.alpha AS alpha\n"
-        + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1\n"
-        + "INNER JOIN test.gamma AS gamma ON alpha.a2 = gamma.g2");
-    List> expected = Arrays.asList(combined, incremental, batch);
+    List batch = Arrays.asList(Table0, Table1);
+    List> expected = Arrays.asList(batch, combined, incremental);
     checkAllPlans(sql, expected);
   }
 
@@ -184,18 +184,16 @@ public void testFourTablesJoin() {
             + "INNER JOIN test.epsilon_prev AS epsilon_prev ON Table#1_delta.g1 = epsilon_prev.e1) AS t\n"
             + "UNION ALL\n" + "SELECT *\n" + "FROM Table#1_delta AS Table#1_delta0\n"
             + "INNER JOIN test.epsilon_delta AS epsilon_delta0 ON Table#1_delta0.g1 = epsilon_delta0.e1) AS t0";
-    List combined1 = Arrays.asList(Table0_delta,
-        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2",
-        "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1");
-    List combined2 = Arrays.asList(Table0_delta, Table1_delta,
-        "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1");
+    String Table0 = "SELECT *\n" + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1";
+    String Table1 =
+        "SELECT *\n" + "FROM Table#0 AS Table#0\n" + "INNER JOIN test.gamma AS gamma ON Table#0.a2 = gamma.g2";
+    String Table2 =
+        "SELECT *\n" + "FROM Table#1 AS Table#1\n" + "INNER JOIN test.epsilon AS epsilon ON Table#1.g1 = epsilon.e1";
+    List combined1 = Arrays.asList(Table0_delta, Table1, Table2);
+    List combined2 = Arrays.asList(Table0_delta, Table1_delta, Table2);
     List incremental = Arrays.asList(Table0_delta, Table1_delta, Table2_delta);
-    List batch = Arrays
-        .asList("SELECT t0.g1, epsilon.e2\n" + "FROM (SELECT t.a2, gamma.g1\n" + "FROM (SELECT alpha.a1, alpha.a2\n"
-            + "FROM test.alpha AS alpha\n" + "INNER JOIN test.beta AS beta ON alpha.a1 = beta.b1) AS t\n"
-            + "INNER JOIN test.gamma AS gamma ON t.a2 = gamma.g2) AS t0\n"
-            + "INNER JOIN test.epsilon AS epsilon ON t0.g1 = epsilon.e1");
-    List> expected = Arrays.asList(combined1, combined2, incremental, batch);
+    List batch = Arrays.asList(Table0, Table1, Table2);
+    List> expected = Arrays.asList(batch, combined1, combined2, incremental);
     checkAllPlans(sql, expected);
   }
 }