Skip to content

Commit e45a2f8

Browse files
gemelenLorenzo Martini
authored andcommitted
[SPARK-21708][BUILD] Migrate build to sbt 1.x
Migrate sbt-launcher URL to download one for sbt 1.x. Update plugins versions where required by sbt update. Change sbt version to be used to latest released at the moment, 1.3.13 Adjust build settings according to plugins and sbt changes. Migration to sbt 1.x: 1. enhances dev experience in development 2. updates build plugins to bring there new features/to fix bugs in them 3. enhances build performance on sbt side 4. eases movement to Scala 3 / dotty No. All existing tests passed, both on Jenkins and via Github Actions, also manually for Scala 2.13 profile. Closes apache#29286 from gemelen/feature/sbt-1.x. Authored-by: Denis Pyshev <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 919028c commit e45a2f8

File tree

11 files changed

+144
-93
lines changed

11 files changed

+144
-93
lines changed

.circleci/config.yml

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ all-branches-and-tags: &all-branches-and-tags
2828
# Step templates
2929

3030
step_templates:
31-
restore-build-binaries-cache: &restore-build-binaries-cache
32-
restore_cache:
33-
key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
3431
restore-ivy-cache: &restore-ivy-cache
3532
restore_cache:
3633
keys:
@@ -136,20 +133,11 @@ jobs:
136133
- maven-dependency-cache-{{ checksum "pom.xml" }}
137134
# Fallback - see https://circleci.com/docs/2.0/configuration-reference/#example-2
138135
- maven-dependency-cache-
139-
# Given the build-maven cache, this is superfluous, but leave it in in case we will want to remove the former
140-
- restore_cache:
141-
keys:
142-
- build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
143-
- build-binaries-
144136
- run:
145137
command: ./build/mvn -DskipTests -Psparkr -Phadoop-palantir install
146138
no_output_timeout: 20m
147139
# Get sbt to run trivially, ensures its launcher is downloaded under build/
148140
- run: ./build/sbt -h || true
149-
- save_cache:
150-
key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
151-
paths:
152-
- ./build
153141
- save_cache:
154142
key: maven-dependency-cache-{{ checksum "pom.xml" }}
155143
paths:
@@ -165,7 +153,6 @@ jobs:
165153
# Failed to execute goal on project spark-assembly_2.11: Could not resolve dependencies for project org.apache.spark:spark-assembly_2.11:pom:2.4.0-SNAPSHOT
166154
- restore_cache:
167155
key: maven-dependency-cache-{{ checksum "pom.xml" }}
168-
- *restore-build-binaries-cache
169156
- run:
170157
name: Run style tests
171158
command: dev/run-style-tests.py
@@ -181,7 +168,6 @@ jobs:
181168
# key: build-maven-{{ .Branch }}-{{ .BuildNum }}
182169
- restore_cache:
183170
key: maven-dependency-cache-{{ checksum "pom.xml" }}
184-
- *restore-build-binaries-cache
185171
- run: |
186172
dev/run-build-tests.py | tee /tmp/run-build-tests.log
187173
- store_artifacts:
@@ -206,7 +192,6 @@ jobs:
206192
fi
207193
- *restore-ivy-cache
208194
- *restore-home-sbt-cache
209-
- *restore-build-binaries-cache
210195
- run:
211196
name: Download all external dependencies for the test configuration (which extends compile) and ensure we update first
212197
command: dev/sbt test:externalDependencyClasspath oldDeps/test:externalDependencyClasspath
@@ -251,7 +236,6 @@ jobs:
251236
- attach_workspace:
252237
at: .
253238
- *restore-ivy-cache
254-
- *restore-build-binaries-cache
255239
- *restore-home-sbt-cache
256240
- run: |
257241
dev/run-backcompat-tests.py | tee /tmp/run-backcompat-tests.log
@@ -305,7 +289,7 @@ jobs:
305289
run-scala-tests:
306290
<<: *test-defaults
307291
# project/CirclePlugin.scala does its own test splitting in SBT based on CIRCLE_NODE_INDEX, CIRCLE_NODE_TOTAL
308-
parallelism: 12
292+
parallelism: 8
309293
# Spark runs a lot of tests in parallel, we need 16 GB of RAM for this
310294
resource_class: xlarge
311295
steps:
@@ -320,7 +304,6 @@ jobs:
320304
- *link-in-build-sbt-cache
321305
# ---
322306
- *restore-ivy-cache
323-
- *restore-build-binaries-cache
324307
- *restore-home-sbt-cache
325308
- restore_cache:
326309
keys:
@@ -407,7 +390,6 @@ jobs:
407390
- *checkout-code
408391
- restore_cache:
409392
key: maven-dependency-cache-{{ checksum "pom.xml" }}
410-
- *restore-build-binaries-cache
411393
- run:
412394
command: dev/set_version_and_package.sh
413395
no_output_timeout: 15m

.sbtopts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
-J-Xmx4G
17+
-J-Xss4m

build/sbt-launch-lib.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dlog () {
3939

4040
acquire_sbt_jar () {
4141
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
42-
URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
42+
URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
4343
JAR=build/sbt-launch-${SBT_VERSION}.jar
4444

4545
sbt_jar=$JAR

dev/run-tests.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,8 @@ def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
390390
if checkstyle:
391391
run_java_style_checks(build_profiles)
392392

393-
build_spark_unidoc_sbt(extra_profiles)
393+
# TODO(lmartini): removed because broken, checks generated classes
394+
# build_spark_unidoc_sbt(extra_profiles)
394395

395396

396397
def build_apache_spark(build_tool, extra_profiles):

project/CirclePlugin.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ object CirclePlugin extends AutoPlugin {
288288
}
289289
},
290290

291-
test := (test, copyTestReportsToCircle) { (test, copy) =>
292-
test.doFinally(copy.map(_ => ()))
293-
}.value
291+
test := (test andFinally Def.taskDyn {
292+
copyTestReportsToCircle
293+
}).value
294294
))
295295
}

project/MimaBuild.scala

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ import com.typesafe.tools.mima.core._
2222
import com.typesafe.tools.mima.core.MissingClassProblem
2323
import com.typesafe.tools.mima.core.MissingTypesProblem
2424
import com.typesafe.tools.mima.core.ProblemFilters._
25-
import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts}
26-
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
27-
25+
import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaFailOnNoPrevious}
2826

2927
object MimaBuild {
3028

@@ -86,14 +84,17 @@ object MimaBuild {
8684
ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
8785
}
8886

89-
def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
87+
def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
9088
val organization = "org.apache.spark"
91-
val previousSparkVersion = "2.4.0"
89+
val previousSparkVersion = "3.0.0"
9290
val project = projectRef.project
9391
val fullId = "spark-" + project + "_2.12"
94-
mimaDefaultSettings ++
95-
Seq(mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
96-
mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
92+
93+
Seq(
94+
mimaFailOnNoPrevious := true,
95+
mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
96+
mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)
97+
)
9798
}
9899

99100
}

project/MimaExcludes.scala

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,44 @@ object MimaExcludes {
3636

3737
// Exclude rules for 3.0.x
3838
lazy val v30excludes = v24excludes ++ Seq(
39+
//[SPARK-21708][BUILD] Migrate build to sbt 1.x
40+
// mima plugin update caused new incompatibilities to be detected
41+
// core module
42+
// TODO(lmartini): this group was originally on top of 3.1 but applied on 3.0 because we picked the above commit
43+
// on top of 3.0
44+
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
45+
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
46+
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
47+
// mllib module
48+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"),
49+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"),
50+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"),
51+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
52+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
53+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
54+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
55+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
56+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
57+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
58+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
59+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
60+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
61+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
62+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
63+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
64+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"),
65+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.areaUnderROC"),
66+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"),
67+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"),
68+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"),
69+
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"),
70+
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.FMClassifier.trainImpl"),
71+
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.FMRegressor.trainImpl"),
72+
// TODO(lmartini): Additional excludes not in upstream but unique to palantir fork
73+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
74+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
75+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.broadcast.Broadcast.initializeForcefully"),
76+
3977
// [SPARK-23429][CORE] Add executor memory metrics to heartbeat and expose in executors REST API
4078
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.apply"),
4179
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.copy"),

0 commit comments

Comments
 (0)