Skip to content

Commit 1d0e82f

Browse files
mihailoale-dbMaxGekk
authored andcommitted
[SPARK-52019][SQL] Strip outer reference before creating a name in toPrettySQL
### What changes were proposed in this pull request? In the following query we would have `min(outer(t2.t2a))` as a name for `min(t2a)` expression. ``` SELECT t1a FROM t1 WHERE t1a IN (SELECT t2a FROM t2 WHERE EXISTS (SELECT min(t2a) FROM t3)) ``` This is a problem in compatibility between single-pass resolver and fixed-point analyzer because names in single-pass are generated after we finish resolution of aggregate expression `min(t2a)` (bottom-up manner) and at that point we have `OuterReference` wrapped around aggregate expression (name looks like `outer(min(t2a))`). I propose that we fix it in fixed point so we can compute the name in single-pass more easily. ### Why are the changes needed? To ease development of single-pass analyzer. ### Does this PR introduce _any_ user-facing change? `Explain extended` of affected plans would be different. ### How was this patch tested? Existing tests (regenerated golden files). ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50804 from mihailoale-db/toprettysqlapichange. Authored-by: mihailoale-db <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent d80e857 commit 1d0e82f

File tree

4 files changed

+50
-27
lines changed

4 files changed

+50
-27
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/LiteralFunctionResolution.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ object LiteralFunctionResolution {
4141
// support CURRENT_DATE, CURRENT_TIMESTAMP, CURRENT_TIME,
4242
// CURRENT_USER, USER, SESSION_USER and grouping__id
4343
private val literalFunctions: Seq[(String, () => Expression, Expression => String)] = Seq(
44-
(CurrentDate().prettyName, () => CurrentDate(), toPrettySQL(_)),
45-
(CurrentTimestamp().prettyName, () => CurrentTimestamp(), toPrettySQL(_)),
46-
(CurrentTime().prettyName, () => CurrentTime(), toPrettySQL(_)),
47-
(CurrentUser().prettyName, () => CurrentUser(), toPrettySQL),
48-
("user", () => CurrentUser(), toPrettySQL),
49-
("session_user", () => CurrentUser(), toPrettySQL),
44+
(CurrentDate().prettyName, () => CurrentDate(), e => toPrettySQL(e)),
45+
(CurrentTimestamp().prettyName, () => CurrentTimestamp(), e => toPrettySQL(e)),
46+
(CurrentTime().prettyName, () => CurrentTime(), e => toPrettySQL(e)),
47+
(CurrentUser().prettyName, () => CurrentUser(), e => toPrettySQL(e)),
48+
("user", () => CurrentUser(), e => toPrettySQL(e)),
49+
("session_user", () => CurrentUser(), e => toPrettySQL(e)),
5050
(VirtualColumn.hiveGroupingIdName, () => GroupingID(Nil), _ => VirtualColumn.hiveGroupingIdName)
5151
)
5252
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import com.google.common.io.ByteStreams
2525

2626
import org.apache.spark.internal.Logging
2727
import org.apache.spark.sql.catalyst.expressions._
28+
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
2829
import org.apache.spark.sql.connector.catalog.MetadataColumn
2930
import org.apache.spark.sql.types.{MetadataBuilder, NumericType, StringType, StructType}
3031
import org.apache.spark.unsafe.types.UTF8String
@@ -91,24 +92,45 @@ package object util extends Logging {
9192

9293
def stackTraceToString(t: Throwable): String = SparkErrorUtils.stackTraceToString(t)
9394

94-
// Replaces attributes, string literals, complex type extractors with their pretty form so that
95-
// generated column names don't contain back-ticks or double-quotes.
96-
def usePrettyExpression(e: Expression): Expression = e transform {
97-
case a: Attribute => new PrettyAttribute(a)
98-
case Literal(s: UTF8String, StringType) => PrettyAttribute(s.toString, StringType)
99-
case Literal(v, t: NumericType) if v != null => PrettyAttribute(v.toString, t)
100-
case Literal(null, dataType) => PrettyAttribute("NULL", dataType)
101-
case e: GetStructField =>
102-
val name = e.name.getOrElse(e.childSchema(e.ordinal).name)
103-
PrettyAttribute(usePrettyExpression(e.child).sql + "." + name, e.dataType)
104-
case e: GetArrayStructFields =>
105-
PrettyAttribute(s"${usePrettyExpression(e.child)}.${e.field.name}", e.dataType)
106-
case r: InheritAnalysisRules =>
107-
PrettyAttribute(r.makeSQLString(r.parameters.map(toPrettySQL)), r.dataType)
108-
case c: Cast if c.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty =>
109-
PrettyAttribute(usePrettyExpression(c.child).sql, c.dataType)
110-
case p: PythonFuncExpression => PrettyPythonUDF(p.name, p.dataType, p.children)
111-
}
95+
/**
96+
* Replaces attributes, string literals, complex type extractors, casts and python functions with
97+
* their pretty form so that generated column names don't contain back-ticks or double-quotes.
98+
*
99+
* In case provided expression is [[AggregateExpression]] that contains a [[OuterReference]],
100+
* pull out the outer reference and compute the name to maintain compatibility with single-pass
101+
* analyzer.
102+
*/
103+
private def usePrettyExpression(e: Expression, stripOuterReference: Boolean = true): Expression =
104+
e transform {
105+
case aggregateExpression: AggregateExpression
106+
if stripOuterReference && SubExprUtils.containsOuter(aggregateExpression) =>
107+
val strippedAggregateExpression = SubExprUtils.stripOuterReference(aggregateExpression)
108+
OuterReference(
109+
new PrettyAttribute(
110+
Alias(
111+
strippedAggregateExpression,
112+
toPrettySQL(strippedAggregateExpression)
113+
)().toAttribute
114+
)
115+
)
116+
case a: Attribute => new PrettyAttribute(a)
117+
case Literal(s: UTF8String, StringType) => PrettyAttribute(s.toString, StringType)
118+
case Literal(v, t: NumericType) if v != null => PrettyAttribute(v.toString, t)
119+
case Literal(null, dataType) => PrettyAttribute("NULL", dataType)
120+
case e: GetStructField =>
121+
val name = e.name.getOrElse(e.childSchema(e.ordinal).name)
122+
PrettyAttribute(usePrettyExpression(e.child).sql + "." + name, e.dataType)
123+
case e: GetArrayStructFields =>
124+
PrettyAttribute(s"${usePrettyExpression(e.child)}.${e.field.name}", e.dataType)
125+
case r: InheritAnalysisRules =>
126+
PrettyAttribute(
127+
r.makeSQLString(r.parameters.map(parameter => toPrettySQL(parameter))),
128+
r.dataType
129+
)
130+
case c: Cast if c.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty =>
131+
PrettyAttribute(usePrettyExpression(c.child).sql, c.dataType)
132+
case p: PythonFuncExpression => PrettyPythonUDF(p.name, p.dataType, p.children)
133+
}
112134

113135
def quoteIdentifier(name: String): String = {
114136
QuotingUtils.quoteIdentifier(name)
@@ -122,7 +144,8 @@ package object util extends Logging {
122144
QuotingUtils.quoteIfNeeded(part)
123145
}
124146

125-
def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql
147+
def toPrettySQL(e: Expression, stripOuterReference: Boolean = true): String =
148+
usePrettyExpression(e, stripOuterReference).sql
126149

127150
def escapeSingleQuotedString(str: String): String = {
128151
QuotingUtils.escapeSingleQuotedString(str)

sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
159159
"errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
160160
"sqlState" : "0A000",
161161
"messageParameters" : {
162-
"sqlExprs" : "\"min(t2a) AS `min(outer(t2.t2a))`\""
162+
"sqlExprs" : "\"min(t2a) AS `outer(min(t2a))`\""
163163
},
164164
"queryContext" : [ {
165165
"objectType" : "",

sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
160160
"errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
161161
"sqlState" : "0A000",
162162
"messageParameters" : {
163-
"sqlExprs" : "\"min(t2a) AS `min(outer(t2.t2a))`\""
163+
"sqlExprs" : "\"min(t2a) AS `outer(min(t2a))`\""
164164
},
165165
"queryContext" : [ {
166166
"objectType" : "",

0 commit comments

Comments
 (0)