Skip to content

Commit 4b5fc1d

Browse files
committed
[SPARK-34667][SQL] Support casting of year-month intervals to strings
### What changes were proposed in this pull request? 1. Added new method `toYearMonthIntervalString()` to `IntervalUtils` which converts an year-month interval as a number of month to a string in the form **"INTERVAL '[sign]yearField-monthField' YEAR TO MONTH"**. 2. Extended the `Cast` expression to support casting of `YearMonthIntervalType` to `StringType`. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? Should not because new year-month interval has not been released yet. ### How was this patch tested? Added new tests for casting: ``` $ build/sbt "testOnly *CastSuite*" ``` Closes #32056 from MaxGekk/cast-ym-interval-to-string. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 19c7d2f commit 4b5fc1d

File tree

3 files changed

+46
-4
lines changed

3 files changed

+46
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
406406
case pudt: PythonUserDefinedType => castToString(pudt.sqlType)
407407
case udt: UserDefinedType[_] =>
408408
buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
409+
case YearMonthIntervalType =>
410+
buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i)))
409411
case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
410412
}
411413

@@ -1121,6 +1123,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
11211123
(c, evPrim, evNull) => {
11221124
code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
11231125
}
1126+
case YearMonthIntervalType =>
1127+
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
1128+
(c, evPrim, _) =>
1129+
code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));"""
11241130
case _ =>
11251131
(c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
11261132
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,4 +834,21 @@ object IntervalUtils {
834834
* @return The period of months, not null
835835
*/
836836
def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized()
837+
838+
/**
839+
* Converts an year-month interval as a number of months to its textual representation
840+
* which conforms to the ANSI SQL standard.
841+
*
842+
* @param months The number of months, positive or negative
843+
* @return Year-month interval string
844+
*/
845+
def toYearMonthIntervalString(months: Int): String = {
846+
var sign = ""
847+
var absMonths: Long = months
848+
if (months < 0) {
849+
sign = "-"
850+
absMonths = -absMonths
851+
}
852+
s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH"
853+
}
837854
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.{Date, Timestamp}
21-
import java.time.DateTimeException
21+
import java.time.{DateTimeException, Period}
2222
import java.util.{Calendar, TimeZone}
2323

2424
import scala.collection.parallel.immutable.ParVector
@@ -64,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
6464
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
6565
(atomicTypes -- Set(
6666
// TODO(SPARK-34668): Support casting of day-time intervals to strings
67-
DayTimeIntervalType,
68-
// TODO(SPARK-34667): Support casting of year-month intervals to strings
69-
YearMonthIntervalType)).foreach(dt => checkNullCast(dt, StringType))
67+
DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType))
7068
checkNullCast(StringType, BinaryType)
7169
checkNullCast(StringType, BooleanType)
7270
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -799,6 +797,27 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
799797
}
800798
}
801799
}
800+
801+
test("SPARK-34667: cast year-month interval to string") {
802+
Seq(
803+
Period.ofMonths(0) -> "0-0",
804+
Period.ofMonths(1) -> "0-1",
805+
Period.ofMonths(-1) -> "-0-1",
806+
Period.ofYears(1) -> "1-0",
807+
Period.ofYears(-1) -> "-1-0",
808+
Period.ofYears(10).plusMonths(10) -> "10-10",
809+
Period.ofYears(-123).minusMonths(6) -> "-123-6",
810+
Period.ofMonths(Int.MaxValue) -> "178956970-7",
811+
Period.ofMonths(Int.MinValue) -> "-178956970-8"
812+
).foreach { case (period, intervalPayload) =>
813+
checkEvaluation(
814+
Cast(Literal(period), StringType),
815+
s"INTERVAL '$intervalPayload' YEAR TO MONTH")
816+
}
817+
818+
checkConsistencyBetweenInterpretedAndCodegen(
819+
(child: Expression) => Cast(child, StringType), YearMonthIntervalType)
820+
}
802821
}
803822

804823
abstract class AnsiCastSuiteBase extends CastSuiteBase {

0 commit comments

Comments
 (0)