Skip to content

[SPARK-52007] [SQL] Expression IDs shouldn't be present in grouping expressions when using grouping sets #50791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = {
groupByExprs.map {
case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier)
case other => Alias(other, other.toString)()
case other => Alias(other, toPrettySQL(other))()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
assert(gExpressions.size == 3)
val firstGroupingExprAttrName =
gExpressions(0).asInstanceOf[AttributeReference].name.replaceAll("#[0-9]*", "#0")
assert(firstGroupingExprAttrName == "(a#0 * 2)")
assert(firstGroupingExprAttrName == "(a * 2)")
assert(gExpressions(1).asInstanceOf[AttributeReference].name == "b")
assert(gExpressions(2).asInstanceOf[AttributeReference].name == VirtualColumn.groupingIdName)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
-- !query
SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE
-- !query analysis
Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, (a#x + b#x)#x, b#x, 0], [a#x, b#x, (a#x + b#x)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x]
Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, (a + b)#x, b#x, 0], [a#x, b#x, (a + b)#x, null, 1], [a#x, b#x, null, b#x, 2], [a#x, b#x, null, null, 3]], [a#x, b#x, (a + b)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
Expand All @@ -43,9 +43,9 @@ Aggregate [a#x, b#x, spark_grouping_id#xL], [a#x, b#x, sum(b#x) AS sum(b)#xL]
-- !query
SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
-- !query analysis
Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, (a#x + b#x)#x, b#x, 0], [a#x, b#x, (a#x + b#x)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x]
Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, (a + b)#x, b#x, 0], [a#x, b#x, (a + b)#x, null, 1], [a#x, b#x, null, null, 3]], [a#x, b#x, (a + b)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
Expand Down Expand Up @@ -531,9 +531,9 @@ Aggregate [k#x, b#x, spark_grouping_id#xL], [k#x, b#x, sum((a#x - b#x)) AS sum((
-- !query
SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
-- !query analysis
Aggregate [(a#x + b#x)#x, k#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a#x + b#x)#x, k#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS k#x]
Aggregate [(a + b)#x, k#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, k#x, sum((a#x - b#x)) AS sum((a - b))#xL]
+- Expand [[a#x, b#x, null, k#x, 2]], [a#x, b#x, (a + b)#x, k#x, spark_grouping_id#xL]
+- Project [a#x, b#x, (a#x + b#x) AS (a + b)#x, b#x AS k#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ Aggregate [a#x, b#x, c#x, spark_grouping_id#xL], [a#x, b#x, c#x, count(d#x) AS c
+- LocalRelation [a#x, b#x, c#x, d#x]


-- !query
SELECT a, b, c, d FROM grouping GROUP BY GROUPING SETS (a, b, c, d)
-- !query analysis
Aggregate [a#x, b#x, c#x, d#x, spark_grouping_id#xL], [a#x, b#x, c#x, d#x]
+- Expand [[a#x, b#x, c#x, d#x, a#x, null, null, null, 7], [a#x, b#x, c#x, d#x, null, b#x, null, null, 11], [a#x, b#x, c#x, d#x, null, null, c#x, null, 13], [a#x, b#x, c#x, d#x, null, null, null, d#x, 14]], [a#x, b#x, c#x, d#x, a#x, b#x, c#x, d#x, spark_grouping_id#xL]
+- Project [a#x, b#x, c#x, d#x, a#x AS a#x, b#x AS b#x, c#x AS c#x, d#x AS d#x]
+- SubqueryAlias grouping
+- View (`grouping`, [a#x, b#x, c#x, d#x])
+- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x, cast(c#x as string) AS c#x, cast(d#x as int) AS d#x]
+- Project [a#x, b#x, c#x, d#x]
+- SubqueryAlias grouping
+- LocalRelation [a#x, b#x, c#x, d#x]


-- !query
SELECT a, b FROM grouping GROUP BY GROUPING SETS (a, b, d + 1) ORDER BY `(d + 1)`
-- !query analysis
Project [a#x, b#x]
+- Sort [(d + 1)#x ASC NULLS FIRST], true
+- Aggregate [a#x, b#x, (d + 1)#x, spark_grouping_id#xL], [a#x, b#x, (d + 1)#x]
+- Expand [[a#x, b#x, c#x, d#x, a#x, null, null, 3], [a#x, b#x, c#x, d#x, null, b#x, null, 5], [a#x, b#x, c#x, d#x, null, null, (d + 1)#x, 6]], [a#x, b#x, c#x, d#x, a#x, b#x, (d + 1)#x, spark_grouping_id#xL]
+- Project [a#x, b#x, c#x, d#x, a#x AS a#x, b#x AS b#x, (d#x + 1) AS (d + 1)#x]
+- SubqueryAlias grouping
+- View (`grouping`, [a#x, b#x, c#x, d#x])
+- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x, cast(c#x as string) AS c#x, cast(d#x as int) AS d#x]
+- Project [a#x, b#x, c#x, d#x]
+- SubqueryAlias grouping
+- LocalRelation [a#x, b#x, c#x, d#x]


-- !query
SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
-- !query analysis
Expand Down Expand Up @@ -120,9 +150,9 @@ Aggregate [c1#x, c2#x, spark_grouping_id#xL], [-c1#x AS c1#x]
-- !query
SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b))
-- !query analysis
Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL]
+- Expand [[a#x, b#x, c#x, (a#x + b#x)#x, null, 1], [a#x, b#x, c#x, null, b#x, 2]], [a#x, b#x, c#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, c#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x]
Aggregate [(a + b)#x, b#x, spark_grouping_id#xL], [(a + b)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL]
+- Expand [[a#x, b#x, c#x, (a + b)#x, null, 1], [a#x, b#x, c#x, null, b#x, 2]], [a#x, b#x, c#x, (a + b)#x, b#x, spark_grouping_id#xL]
+- Project [a#x, b#x, c#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x]
+- SubqueryAlias t
+- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x]
+- LocalRelation [col1#x, col2#x, col3#x]
Expand All @@ -131,9 +161,9 @@ Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL], [(a#x + b#x)#x AS (a + b)#
-- !query
SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b))
-- !query analysis
Aggregate [(a#x + b#x)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [(a#x + b#x)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL]
+- Expand [[a#x, b#x, c#x, (a#x + b#x)#x, null, 1, 0], [a#x, b#x, c#x, (a#x + b#x)#x, null, 1, 1], [a#x, b#x, c#x, null, b#x, 2, 2]], [a#x, b#x, c#x, (a#x + b#x)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x]
+- Project [a#x, b#x, c#x, (a#x + b#x) AS (a#x + b#x)#x, b#x AS b#x]
Aggregate [(a + b)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [(a + b)#x AS (a + b)#x, b#x, sum(c#x) AS sum(c)#xL]
+- Expand [[a#x, b#x, c#x, (a + b)#x, null, 1, 0], [a#x, b#x, c#x, (a + b)#x, null, 1, 1], [a#x, b#x, c#x, null, b#x, 2, 2]], [a#x, b#x, c#x, (a + b)#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x]
+- Project [a#x, b#x, c#x, (a#x + b#x) AS (a + b)#x, b#x AS b#x]
+- SubqueryAlias t
+- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x]
+- LocalRelation [col1#x, col2#x, col3#x]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,9 @@ select (x+y)*1, sum(z)
from (select 1 as x, 2 as y, 3 as z) s
group by grouping sets (x+y, x)
-- !query analysis
Aggregate [(x#x + y#x)#x, x#x, spark_grouping_id#xL], [((x#x + y#x)#x * 1) AS ((x + y) * 1)#x, sum(z#x) AS sum(z)#xL]
+- Expand [[x#x, y#x, z#x, (x#x + y#x)#x, null, 1], [x#x, y#x, z#x, null, x#x, 2]], [x#x, y#x, z#x, (x#x + y#x)#x, x#x, spark_grouping_id#xL]
+- Project [x#x, y#x, z#x, (x#x + y#x) AS (x#x + y#x)#x, x#x AS x#x]
Aggregate [(x + y)#x, x#x, spark_grouping_id#xL], [((x + y)#x * 1) AS ((x + y) * 1)#x, sum(z#x) AS sum(z)#xL]
+- Expand [[x#x, y#x, z#x, (x + y)#x, null, 1], [x#x, y#x, z#x, null, x#x, 2]], [x#x, y#x, z#x, (x + y)#x, x#x, spark_grouping_id#xL]
+- Project [x#x, y#x, z#x, (x#x + y#x) AS (x + y)#x, x#x AS x#x]
+- SubqueryAlias s
+- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+- OneRowRelation
Expand Down Expand Up @@ -593,9 +593,9 @@ select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,4,7 /* 3,6 */
-- !query analysis
Sort [grouping(a)#x ASC NULLS FIRST, grouping(b)#x ASC NULLS FIRST, max(v)#x ASC NULLS FIRST], true
+- Aggregate [a#x, b#x, (a#x + 1)#x, (b#x + 1)#x, (a#x + 2)#x, (b#x + 2)#x, spark_grouping_id#xL], [a#x, b#x, cast((shiftright(spark_grouping_id#xL, 5) & 1) as tinyint) AS grouping(a)#x, cast((shiftright(spark_grouping_id#xL, 4) & 1) as tinyint) AS grouping(b)#x, sum(v#x) AS sum(v)#xL, count(1) AS count(1)#xL, max(v#x) AS max(v)#x]
+- Expand [[a#x, b#x, v#x, a#x, b#x, null, null, null, null, 15], [a#x, b#x, v#x, null, null, (a#x + 1)#x, (b#x + 1)#x, null, null, 51], [a#x, b#x, v#x, null, null, null, null, (a#x + 2)#x, (b#x + 2)#x, 60]], [a#x, b#x, v#x, a#x, b#x, (a#x + 1)#x, (b#x + 1)#x, (a#x + 2)#x, (b#x + 2)#x, spark_grouping_id#xL]
+- Project [a#x, b#x, v#x, a#x AS a#x, b#x AS b#x, (a#x + 1) AS (a#x + 1)#x, (b#x + 1) AS (b#x + 1)#x, (a#x + 2) AS (a#x + 2)#x, (b#x + 2) AS (b#x + 2)#x]
+- Aggregate [a#x, b#x, (a + 1)#x, (b + 1)#x, (a + 2)#x, (b + 2)#x, spark_grouping_id#xL], [a#x, b#x, cast((shiftright(spark_grouping_id#xL, 5) & 1) as tinyint) AS grouping(a)#x, cast((shiftright(spark_grouping_id#xL, 4) & 1) as tinyint) AS grouping(b)#x, sum(v#x) AS sum(v)#xL, count(1) AS count(1)#xL, max(v#x) AS max(v)#x]
+- Expand [[a#x, b#x, v#x, a#x, b#x, null, null, null, null, 15], [a#x, b#x, v#x, null, null, (a + 1)#x, (b + 1)#x, null, null, 51], [a#x, b#x, v#x, null, null, null, null, (a + 2)#x, (b + 2)#x, 60]], [a#x, b#x, v#x, a#x, b#x, (a + 1)#x, (b + 1)#x, (a + 2)#x, (b + 2)#x, spark_grouping_id#xL]
+- Project [a#x, b#x, v#x, a#x AS a#x, b#x AS b#x, (a#x + 1) AS (a + 1)#x, (b#x + 1) AS (b + 1)#x, (a#x + 2) AS (a + 2)#x, (b#x + 2) AS (b + 2)#x]
+- SubqueryAlias gstest1
+- View (`gstest1`, [a#x, b#x, v#x])
+- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS v#x]
Expand Down Expand Up @@ -634,9 +634,9 @@ select v||'a', case grouping(v||'a') when 1 then 1 else 0 end, count(*)
group by rollup(i, v||'a') order by 1,3
-- !query analysis
Sort [concat(v, a)#x ASC NULLS FIRST, count(1)#xL ASC NULLS FIRST], true
+- Aggregate [i#x, concat(v#x, a)#x, spark_grouping_id#xL], [concat(v#x, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL]
+- Expand [[i#x, v#x, i#x, concat(v#x, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v#x, a)#x, spark_grouping_id#xL]
+- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v#x, a)#x]
+- Aggregate [i#x, concat(v, a)#x, spark_grouping_id#xL], [concat(v, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL]
+- Expand [[i#x, v#x, i#x, concat(v, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v, a)#x, spark_grouping_id#xL]
+- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v, a)#x]
+- SubqueryAlias u
+- LocalRelation [i#x, v#x]

Expand All @@ -647,9 +647,9 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
group by rollup(i, v||'a') order by 1,3
-- !query analysis
Sort [concat(v, a)#x ASC NULLS FIRST, count(1)#xL ASC NULLS FIRST], true
+- Aggregate [i#x, concat(v#x, a)#x, spark_grouping_id#xL], [concat(v#x, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL]
+- Expand [[i#x, v#x, i#x, concat(v#x, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v#x, a)#x, spark_grouping_id#xL]
+- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v#x, a)#x]
+- Aggregate [i#x, concat(v, a)#x, spark_grouping_id#xL], [concat(v, a)#x AS concat(v, a)#x, CASE WHEN (cast(cast((shiftright(spark_grouping_id#xL, 0) & 1) as tinyint) as int) = 1) THEN 1 ELSE 0 END AS CASE WHEN (grouping(concat(v, a)) = 1) THEN 1 ELSE 0 END#x, count(1) AS count(1)#xL]
+- Expand [[i#x, v#x, i#x, concat(v, a)#x, 0], [i#x, v#x, i#x, null, 1], [i#x, v#x, null, null, 3]], [i#x, v#x, i#x, concat(v, a)#x, spark_grouping_id#xL]
+- Project [i#x, v#x, i#x AS i#x, concat(v#x, a) AS concat(v, a)#x]
+- SubqueryAlias u
+- LocalRelation [i#x, v#x]

Expand Down
Loading