diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c52a1d0f56e9f..7b0f3e37f9649 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -611,7 +611,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = { groupByExprs.map { case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier) - case other => Alias(other, other.toString)() + case other => Alias(other, toPrettySQL(other))() } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala index 486de0d1015a6..d075e5f50e507 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala @@ -112,7 +112,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { assert(gExpressions.size == 3) val firstGroupingExprAttrName = gExpressions(0).asInstanceOf[AttributeReference].name.replaceAll("#[0-9]*", "#0") - assert(firstGroupingExprAttrName == "(a#0 * 2)") + assert(firstGroupingExprAttrName == "(a * 2)") assert(gExpressions(1).asInstanceOf[AttributeReference].name == "b") assert(gExpressions(2).asInstanceOf[AttributeReference].name == VirtualColumn.groupingIdName) } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out index 8b2e55f126287..fff84a1a253b4 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out @@ -15,9 +15,9 @@ AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true -- !query SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE -- !query analysis -Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] -+- Expand [[a#x, b#x, (a#x + b#x)#x, b#x, 0], [a#x, b#x, (a#x + b#x)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x] +Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] ++- Expand [[a#x, b#x, (a + b)#x, b#x, 0], [a#x, b#x, (a + b)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, (a + b)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -43,9 +43,9 @@ Aggregate [a#x, b#x, spark_grouping_id#xL], [a#x, b#x, sum(b#x) AS sum(b)#xL] -- !query SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP -- !query analysis -Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] -+- Expand [[a#x, b#x, (a#x + b#x)#x, b#x, 0], [a#x, b#x, (a#x + b#x)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x] +Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] ++- Expand [[a#x, b#x, (a + b)#x, b#x, 0], [a#x, b#x, (a + b)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a + b)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -531,9 +531,9 @@ Aggregate [k#x, b#x, spark_grouping_id#xL], [k#x, b#x, sum((a#x - b#x)) AS sum(( -- !query SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) -- !query analysis -Aggregate [(a#x + b#x)#x, k#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL] -+- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a#x + b#x)#x, k#x, spark_grouping_id#xL] - +- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS k#x] +Aggregate [(a + b)#x, k#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL] ++- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a + b)#x, k#x, spark_grouping_id#xL] + +- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS k#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out index b73ee16c8bdef..254f9d0785408 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out @@ -58,6 +58,36 @@ Aggregate [a#x, b#x, c#x, spark_grouping_id#xL], [a#x, b#x, c#x, count(d#x) AS c +- LocalRelation [a#x, b#x, c#x, d#x] +-- !query +SELECT a, b, c, d FROM grouping GROUP BY GROUPING SETS (a, b, c, d) +-- !query analysis +Aggregate [a#x, b#x, c#x, d#x, spark_grouping_id#xL], [a#x, b#x, c#x, d#x] ++- Expand [[a#x, b#x, c#x, d#x, a#x, null, null, null, 7], [a#x, b#x, c#x, d#x, null, b#x, null, null, 11], [a#x, b#x, c#x, d#x, null, null, c#x, null, 13], [a#x, b#x, c#x, d#x, null, null, null, d#x, 14]], [a#x, b#x, c#x, d#x, a#x, b#x, c#x, d#x, spark_grouping_id#xL] + +- Project [a#x, b#x, c#x, d#x, a#x AS a#x, b#x AS b#x, c#x AS c#x, d#x AS d#x] + +- SubqueryAlias grouping + +- View (`grouping`, [a#x, b#x, c#x, d#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x, cast(c#x as string) AS c#x, cast(d#x as int) AS d#x] + +- Project [a#x, b#x, c#x, d#x] + +- SubqueryAlias grouping + +- LocalRelation [a#x, b#x, c#x, d#x] + + +-- !query +SELECT a, b FROM grouping GROUP BY GROUPING SETS (a, b, d + 1) ORDER BY `(d + 1)` +-- !query analysis +Project [a#x, b#x] ++- Sort [(d + 1)#x ASC NULLS FIRST], true + +- Aggregate [a#x, b#x, (d + 1)#x, spark_grouping_id#xL], [a#x, b#x, (d + 1)#x] + +- Expand [[a#x, b#x, c#x, d#x, a#x, null, null, 3], [a#x, b#x, c#x, d#x, null, b#x, null, 5], [a#x, b#x, c#x, d#x, null, null, (d + 1)#x, 6]], [a#x, b#x, c#x, d#x, a#x, b#x, (d + 1)#x, spark_grouping_id#xL] + +- Project [a#x, b#x, c#x, d#x, a#x AS a#x, b#x AS b#x, (d#x + 1) AS (d + 1)#x] + +- SubqueryAlias grouping + +- View (`grouping`, [a#x, b#x, c#x, d#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x, cast(c#x as string) AS c#x, cast(d#x as int) AS d#x] + +- Project [a#x, b#x, c#x, d#x] + +- SubqueryAlias grouping + +- LocalRelation [a#x, b#x, c#x, d#x] + + -- !query SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1) -- !query analysis @@ -120,9 +150,9 @@ Aggregate [c1#x, c2#x, spark_grouping_id#xL], [-c1#x AS c1#x] -- !query SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b)) -- !query analysis -Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL] -+- Expand [[a#x, b#x, c#x, (a#x + b#x)#x, null, 1], [a#x, b#x, c#x, null, b#x, 2]], [a#x, b#x, c#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, c#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x] +Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL] ++- Expand [[a#x, b#x, c#x, (a + b)#x, null, 1], [a#x, b#x, c#x, null, b#x, 2]], [a#x, b#x, c#x, (a + b)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, c#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x] +- SubqueryAlias t +- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x] +- LocalRelation [col1#x, col2#x, col3#x] @@ -131,9 +161,9 @@ Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)# -- !query SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b)) -- !query analysis -Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [(a#x + b#x)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL] -+- Expand [[a#x, b#x, c#x, (a#x + b#x)#x, null, 1, 0], [a#x, b#x, c#x, (a#x + b#x)#x, null, 1, 1], [a#x, b#x, c#x, null, b#x, 2, 2]], [a#x, b#x, c#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x] - +- Project [a#x, b#x, c#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x] +Aggregate [(a + b)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [(a + b)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL] ++- Expand [[a#x, b#x, c#x, (a + b)#x, null, 1, 0], [a#x, b#x, c#x, (a + b)#x, null, 1, 1], [a#x, b#x, c#x, null, b#x, 2, 2]], [a#x, b#x, c#x, (a + b)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x] + +- Project [a#x, b#x, c#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x] +- SubqueryAlias t +- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x] +- LocalRelation [col1#x, col2#x, col3#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out index 27e9707425833..82b03e986f9dd 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out @@ -299,9 +299,9 @@ select (x+y)*1, sum(z) from (select 1 as x, 2 as y, 3 as z) s group by grouping sets (x+y, x) -- !query analysis -Aggregate [(x#x + y#x)#x, x#x, spark_grouping_id#xL], [((x#x + y#x)#x * 1) AS ((x + y) * 1)#x, sum(z#x) AS sum(z)#xL] -+- Expand [[x#x, y#x, z#x, (x#x + y#x)#x, null, 1], [x#x, y#x, z#x, null, x#x, 2]], [x#x, y#x, z#x, (x#x + y#x)#x, x#x, spark_grouping_id#xL] - +- Project [x#x, y#x, z#x, (x#x + y#x) AS (x#x + y#x)#x, x#x AS x#x] +Aggregate [(x + y)#x, x#x, spark_grouping_id#xL], [((x + y)#x * 1) AS ((x + y) * 1)#x, sum(z#x) AS sum(z)#xL] ++- Expand [[x#x, y#x, z#x, (x + y)#x, null, 1], [x#x, y#x, z#x, null, x#x, 2]], [x#x, y#x, z#x, (x + y)#x, x#x, spark_grouping_id#xL] + +- Project [x#x, y#x, z#x, (x#x + y#x) AS (x + y)#x, x#x AS x#x] +- SubqueryAlias s +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x] +- OneRowRelation @@ -593,9 +593,9 @@ select a, b, grouping(a), grouping(b), sum(v), count(*), max(v) from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,4,7 /* 3,6 */ -- !query analysis Sort [grouping(a)#x ASC NULLS FIRST, grouping(b)#x ASC NULLS FIRST, max(v)#x ASC NULLS FIRST], true -+- Aggregate [a#x, b#x, (a#x + 1)#x, (b#x + 1)#x, (a#x + 2)#x, (b#x + 2)#x, spark_grouping_id#xL], [a#x, b#x, cast((shiftright(spark_grouping_id#xL, 5) & 1) as tinyint) AS grouping(a)#x, cast((shiftright(spark_grouping_id#xL, 4) & 1) as tinyint) AS grouping(b)#x, sum(v#x) AS sum(v)#xL, count(1) AS count(1)#xL, max(v#x) AS max(v)#x] - +- Expand [[a#x, b#x, v#x, a#x, b#x, null, null, null, null, 15], [a#x, b#x, v#x, null, null, (a#x + 1)#x, (b#x + 1)#x, null, null, 51], [a#x, b#x, v#x, null, null, null, null, (a#x + 2)#x, (b#x + 2)#x, 60]], [a#x, b#x, v#x, a#x, b#x, (a#x + 1)#x, (b#x + 1)#x, (a#x + 2)#x, (b#x + 2)#x, spark_grouping_id#xL] - +- Project [a#x, b#x, v#x, a#x AS a#x, b#x AS b#x, (a#x + 1) AS (a#x + 1)#x, (b#x + 1) AS (b#x + 1)#x, (a#x + 2) AS (a#x + 2)#x, (b#x + 2) AS (b#x + 2)#x] ++- Aggregate [a#x, b#x, (a + 1)#x, (b + 1)#x, (a + 2)#x, (b + 2)#x, spark_grouping_id#xL], [a#x, b#x, cast((shiftright(spark_grouping_id#xL, 5) & 1) as tinyint) AS grouping(a)#x, cast((shiftright(spark_grouping_id#xL, 4) & 1) as tinyint) AS grouping(b)#x, sum(v#x) AS sum(v)#xL, count(1) AS count(1)#xL, max(v#x) AS max(v)#x] + +- Expand [[a#x, b#x, v#x, a#x, b#x, null, null, null, null, 15], [a#x, b#x, v#x, null, null, (a + 1)#x, (b + 1)#x, null, null, 51], [a#x, b#x, v#x, null, null, null, null, (a + 2)#x, (b + 2)#x, 60]], [a#x, b#x, v#x, a#x, b#x, (a + 1)#x, (b + 1)#x, (a + 2)#x, (b + 2)#x, spark_grouping_id#xL] + +- Project [a#x, b#x, v#x, a#x AS a#x, b#x AS b#x, (a#x + 1) AS (a + 1)#x, (b#x + 1) AS (b + 1)#x, (a#x + 2) AS (a + 2)#x, (b#x + 2) AS (b + 2)#x] +- SubqueryAlias gstest1 +- View (`gstest1`, [a#x, b#x, v#x]) +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS v#x] @@ -634,9 +634,9 @@ select v||'a', case grouping(v||'a') when 1 then 1 else 0 end, count(*) group by rollup(i, v||'a') order by 1,3 -- !query analysis Sort [concat(v, a)#x ASC NULLS FIRST, count(1)#xL ASC NULLS FIRST], true -+- Aggregate [i#x, concat(v#x, a)#x, spark_grouping_id#xL], [concat(v#x, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL] - +- Expand [[i#x, v#x, i#x, concat(v#x, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v#x, a)#x, spark_grouping_id#xL] - +- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v#x, a)#x] ++- Aggregate [i#x, concat(v, a)#x, spark_grouping_id#xL], [concat(v, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL] + +- Expand [[i#x, v#x, i#x, concat(v, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v, a)#x, spark_grouping_id#xL] + +- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v, a)#x] +- SubqueryAlias u +- LocalRelation [i#x, v#x] @@ -647,9 +647,9 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*) group by rollup(i, v||'a') order by 1,3 -- !query analysis Sort [concat(v, a)#x ASC NULLS FIRST, count(1)#xL ASC NULLS FIRST], true -+- Aggregate [i#x, concat(v#x, a)#x, spark_grouping_id#xL], [concat(v#x, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL] - +- Expand [[i#x, v#x, i#x, concat(v#x, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v#x, a)#x, spark_grouping_id#xL] - +- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v#x, a)#x] ++- Aggregate [i#x, concat(v, a)#x, spark_grouping_id#xL], [concat(v, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL] + +- Expand [[i#x, v#x, i#x, concat(v, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v, a)#x, spark_grouping_id#xL] + +- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v, a)#x] +- SubqueryAlias u +- LocalRelation [i#x, v#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out index fbee3e2c8c89f..b246c4530cbb1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out @@ -15,9 +15,9 @@ AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true -- !query SELECT udf(a + b), b, udf(SUM(a - b)) FROM testData GROUP BY udf(a + b), b WITH CUBE -- !query analysis -Aggregate [cast(udf(cast((a#x + b#x) as string)) as int)#x, b#x, spark_grouping_id#xL], [cast(udf(cast((a#x + b#x) as string)) as int)#x AS udf((a + b))#x, b#x, cast(udf(cast(sum((a#x - b#x)) as string)) as bigint) AS udf(sum((a - b)))#xL] -+- Expand [[a#x, b#x, cast(udf(cast((a#x + b#x) as string)) as int)#x, b#x, 0], [a#x, b#x, cast(udf(cast((a#x + b#x) as string)) as int)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, cast(udf(cast((a#x + b#x) as string)) as int)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, cast(udf(cast((a#x + b#x) as string)) as int) AS cast(udf(cast((a#x + b#x) as string)) as int)#x, b#x AS b#x] +Aggregate [udf((a + b))#x, b#x, spark_grouping_id#xL], [udf((a + b))#x AS udf((a + b))#x, b#x, cast(udf(cast(sum((a#x - b#x)) as string)) as bigint) AS udf(sum((a - b)))#xL] ++- Expand [[a#x, b#x, udf((a + b))#x, b#x, 0], [a#x, b#x, udf((a + b))#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, udf((a + b))#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, cast(udf(cast((a#x + b#x) as string)) as int) AS udf((a + b))#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -29,9 +29,9 @@ Aggregate [cast(udf(cast((a#x + b#x) as string)) as int)#x, b#x, spark_grouping_ -- !query SELECT udf(a), udf(b), SUM(b) FROM testData GROUP BY udf(a), b WITH CUBE -- !query analysis -Aggregate [cast(udf(cast(a#x as string)) as int)#x, b#x, spark_grouping_id#xL], [cast(udf(cast(a#x as string)) as int)#x AS udf(a)#x, cast(udf(cast(b#x as string)) as int) AS udf(b)#x, sum(b#x) AS sum(b)#xL] -+- Expand [[a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, b#x, 0], [a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, cast(udf(cast(a#x as string)) as int) AS cast(udf(cast(a#x as string)) as int)#x, b#x AS b#x] +Aggregate [udf(a)#x, b#x, spark_grouping_id#xL], [udf(a)#x AS udf(a)#x, cast(udf(cast(b#x as string)) as int) AS udf(b)#x, sum(b#x) AS sum(b)#xL] ++- Expand [[a#x, b#x, udf(a)#x, b#x, 0], [a#x, b#x, udf(a)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, udf(a)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, cast(udf(cast(a#x as string)) as int) AS udf(a)#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -43,9 +43,9 @@ Aggregate [cast(udf(cast(a#x as string)) as int)#x, b#x, spark_grouping_id#xL], -- !query SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP -- !query analysis -Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [cast(udf(cast((a#x + b#x)#x as string)) as int) AS udf((a + b))#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] -+- Expand [[a#x, b#x, (a#x + b#x)#x, b#x, 0], [a#x, b#x, (a#x + b#x)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x] +Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [cast(udf(cast((a + b)#x as string)) as int) AS udf((a + b))#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL] ++- Expand [[a#x, b#x, (a + b)#x, b#x, 0], [a#x, b#x, (a + b)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a + b)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -57,9 +57,9 @@ Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [cast(udf(cast((a#x + b#x) -- !query SELECT udf(a), b, udf(SUM(b)) FROM testData GROUP BY udf(a), b WITH ROLLUP -- !query analysis -Aggregate [cast(udf(cast(a#x as string)) as int)#x, b#x, spark_grouping_id#xL], [cast(udf(cast(a#x as string)) as int)#x AS udf(a)#x, b#x, cast(udf(cast(sum(b#x) as string)) as bigint) AS udf(sum(b))#xL] -+- Expand [[a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, b#x, 0], [a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, cast(udf(cast(a#x as string)) as int)#x, b#x, spark_grouping_id#xL] - +- Project [a#x, b#x, cast(udf(cast(a#x as string)) as int) AS cast(udf(cast(a#x as string)) as int)#x, b#x AS b#x] +Aggregate [udf(a)#x, b#x, spark_grouping_id#xL], [udf(a)#x AS udf(a)#x, b#x, cast(udf(cast(sum(b#x) as string)) as bigint) AS udf(sum(b))#xL] ++- Expand [[a#x, b#x, udf(a)#x, b#x, 0], [a#x, b#x, udf(a)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, udf(a)#x, b#x, spark_grouping_id#xL] + +- Project [a#x, b#x, cast(udf(cast(a#x as string)) as int) AS udf(a)#x, b#x AS b#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] @@ -404,9 +404,9 @@ Aggregate [k#x, b#x, spark_grouping_id#xL], [k#x, b#x, sum((a#x - b#x)) AS sum(( -- !query SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) -- !query analysis -Aggregate [(a#x + b#x)#x, k#x, spark_grouping_id#xL], [cast(udf(cast((a#x + b#x)#x as string)) as int) AS udf((a + b))#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL] -+- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a#x + b#x)#x, k#x, spark_grouping_id#xL] - +- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, cast(udf(cast(cast(udf(cast(b#x as string)) as int) as string)) as int) AS k#x] +Aggregate [(a + b)#x, k#x, spark_grouping_id#xL], [cast(udf(cast((a + b)#x as string)) as int) AS udf((a + b))#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL] ++- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a + b)#x, k#x, spark_grouping_id#xL] + +- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, cast(udf(cast(cast(udf(cast(b#x as string)) as int) as string)) as int) AS k#x] +- SubqueryAlias testdata +- View (`testData`, [a#x, b#x]) +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql index 909c36c926c17..013a5419f8d58 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql @@ -13,6 +13,10 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)); -- SPARK-17849: grouping set throws NPE #3 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)); +-- SPARK-52007: grouping set doesn't produce expression IDs in grouping expressions +SELECT a, b, c, d FROM grouping GROUP BY GROUPING SETS (a, b, c, d); +SELECT a, b FROM grouping GROUP BY GROUPING SETS (a, b, d + 1) ORDER BY `(d + 1)`; + -- Group sets without explicit group by SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1); diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out index 5849acd094fbd..21e70c63535cb 100644 --- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -39,6 +39,37 @@ NULL NULL 6 1 NULL NULL 9 1 +-- !query +SELECT a, b, c, d FROM grouping GROUP BY GROUPING SETS (a, b, c, d) +-- !query schema +struct +-- !query output +1 NULL NULL NULL +4 NULL NULL NULL +7 NULL NULL NULL +NULL 2 NULL NULL +NULL 5 NULL NULL +NULL 8 NULL NULL +NULL NULL 3 NULL +NULL NULL 6 NULL +NULL NULL 9 NULL +NULL NULL NULL 1 + + +-- !query +SELECT a, b FROM grouping GROUP BY GROUPING SETS (a, b, d + 1) ORDER BY `(d + 1)` +-- !query schema +struct +-- !query output +NULL 2 +1 NULL +NULL 5 +NULL 8 +4 NULL +7 NULL +NULL NULL + + -- !query SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1) -- !query schema