@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan,
2626import org .apache .spark .sql .catalyst .plans .physical ._
2727import org .apache .spark .sql .execution .adaptive .AdaptiveSparkPlanHelper
2828import org .apache .spark .sql .execution .columnar .{InMemoryRelation , InMemoryTableScanExec }
29- import org .apache .spark .sql .execution .exchange .{EnsureRequirements , ReusedExchangeExec , ReuseExchange , ShuffleExchangeExec }
29+ import org .apache .spark .sql .execution .exchange .{EnsureRequirements , PruneShuffleAndSort , ReusedExchangeExec , ReuseExchange , ShuffleExchangeExec }
3030import org .apache .spark .sql .execution .joins .{BroadcastHashJoinExec , SortMergeJoinExec }
3131import org .apache .spark .sql .functions ._
3232import org .apache .spark .sql .internal .SQLConf
@@ -482,7 +482,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
482482 val inputPlan = ShuffleExchangeExec (
483483 partitioning,
484484 DummySparkPlan (outputPartitioning = partitioning))
485- val outputPlan = EnsureRequirements (spark.sessionState.conf ).apply(inputPlan)
485+ val outputPlan = PruneShuffleAndSort ( ).apply(inputPlan)
486486 assertDistributionRequirementsAreSatisfied(outputPlan)
487487 if (outputPlan.collect { case e : ShuffleExchangeExec => true }.size == 1 ) {
488488 fail(s " Topmost Exchange should not have been eliminated: \n $outputPlan" )
@@ -775,6 +775,42 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
775775 }
776776 }
777777
778+ test(" SPARK-28148: repartition after join is not optimized away" ) {
779+ val df1 = spark.range(0 , 5000000 , 1 , 5 )
780+ val df2 = spark.range(0 , 10000000 , 1 , 5 )
781+
782+ // non global sort order and partitioning should be reusable after left join
783+ val outputPlan1 = df1.join(df2, Seq (" id" ), " left" )
784+ .repartition(df1(" id" ))
785+ .sortWithinPartitions(df1(" id" ))
786+ .queryExecution.executedPlan
787+ val numSorts1 = outputPlan1.collect{case s : SortExec => s }
788+ val numShuffles1 = outputPlan1.collect{case s : ShuffleExchangeExec => s }
789+ assert(numSorts1.length == 2 )
790+ assert(numShuffles1.length == 2 )
791+
792+ // non global sort order and partitioning should be reusable after inner join
793+ val outputPlan2 = df1.join(df2, Seq (" id" ))
794+ .repartition(df1(" id" ))
795+ .sortWithinPartitions(df1(" id" ))
796+ .queryExecution.executedPlan
797+
798+ val numSorts2 = outputPlan2.collect{case s : SortExec => s }
799+ val numShuffles2 = outputPlan2.collect{case s : ShuffleExchangeExec => s }
800+ assert(numSorts2.length == 2 )
801+ assert(numShuffles2.length == 2 )
802+
803+ // global sort should not be removed
804+ val outputPlan3 = df1.join(df2, Seq (" id" ))
805+ .orderBy(df1(" id" ))
806+ .queryExecution.executedPlan
807+
808+ val numSorts3 = outputPlan3.collect{case s : SortExec => s }
809+ val numShuffles3 = outputPlan3.collect{case s : ShuffleExchangeExec => s }
810+ assert(numSorts3.length == 3 )
811+ assert(numShuffles3.length == 3 )
812+ }
813+
778814 test(" SPARK-24500: create union with stream of children" ) {
779815 val df = Union (Stream (
780816 Range (1 , 1 , 1 , 1 ),
0 commit comments