Skip to content

Commit 40ea038

Browse files
authored
GEOMESA-3536 Spark - use testcontainers for running Spark tests on a cluster (#3444)
1 parent be3971d commit 40ea038

File tree

40 files changed

+2804
-3059
lines changed

40 files changed

+2804
-3059
lines changed

.github/workflows/build-and-test.yml

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ env:
1717
MAVEN_CLI_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false --batch-mode -Dlicense.skip=true
1818
MAVEN_COMPILE_ARGS: clean install -Dmaven.test.skip -Dmaven.assembly.skip=true -Dmaven.source.skip -Pskip-spark-runtimes -T4
1919
MAVEN_SPARK_ARGS: clean install -Dmaven.test.skip -Dmaven.assembly.skip=true -Dmaven.source.skip -T2
20-
MAVEN_TEST_ARGS: test -Dtest.fork.count=1
21-
MAVEN_IT_ARGS: test failsafe:integration-test failsafe:verify -DskipTests -Dtest.fork.count=1
20+
MAVEN_TEST_ARGS: test
21+
MAVEN_SPARK_TEST_ARGS: test -Dtest.fork.count=2
22+
MAVEN_IT_ARGS: test failsafe:integration-test failsafe:verify -DskipTests
2223
MAVEN_ASSEMBLY_ARGS: assembly:single@make-assembly -Dassembly.ignoreMissingDescriptor=true -T2
2324
MAVEN_SCALADOC_ARGS: generate-sources scala:doc-jar
2425
MAVEN_JAVADOC_ARGS: generate-sources javadoc:jar -Psite
@@ -200,7 +201,6 @@ jobs:
200201
- name: Integration Tests
201202
run: |
202203
export IT_MODULES="$(find . -path '*/src/test/*' -name '*IT.scala' -o -name '*IT.java' | sed 's|\(.*\)src/test.*|\1|' | sort -u | tr '\n' ',')"
203-
echo $IT_MODULES
204204
mvn $MAVEN_IT_ARGS $MAVEN_CLI_OPTS -pl "$IT_MODULES"
205205
- name: Remove geomesa artifacts
206206
if: success() || failure()
@@ -237,13 +237,11 @@ jobs:
237237
- name: Build Spark runtimes
238238
# build the spark runtime jars, they are slow so we do them here in a parallel job
239239
run: |
240-
export PROJECT_LIST="$(find . -name '*-spark-runtime*' -type d -printf '%P,')"
241-
echo $PROJECT_LIST
242-
mvn $MAVEN_SPARK_ARGS $MAVEN_CLI_OPTS -pl "$PROJECT_LIST"
240+
export SPARK_RUNTIME_MODULES="$(find . -name '*-spark-runtime*' -type d -printf '%P,')"
241+
mvn $MAVEN_SPARK_ARGS $MAVEN_CLI_OPTS -pl "$SPARK_RUNTIME_MODULES"
243242
- name: Run Spark tests
244-
run: |
245-
echo mvn $MAVEN_TEST_ARGS $MAVEN_CLI_OPTS -f geomesa-spark
246-
mvn $MAVEN_TEST_ARGS $MAVEN_CLI_OPTS -f geomesa-spark
243+
if: matrix.scala-version == '2.12' # there's no 2.13 spark docker container
244+
run: mvn $MAVEN_SPARK_TEST_ARGS $MAVEN_CLI_OPTS -f geomesa-spark
247245
- name: Remove geomesa artifacts
248246
if: success() || failure()
249247
run: rm -rf ~/.m2/repository/org/locationtech/geomesa

build/cqs.tsv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,9 @@ org.apache.kafka:kafka-streams-scala_2.12 3.9.1 provided
317317
org.apache.kafka:kafka-streams 3.9.1 provided
318318
org.apache.sedona:sedona-spark-3.5_2.12 1.8.0 provided
319319
org.apache.sedona:sedona-spark-common-3.5_2.12 1.8.0 provided
320-
org.apache.spark:spark-catalyst_2.12 3.5.5 provided
321-
org.apache.spark:spark-core_2.12 3.5.5 provided
322-
org.apache.spark:spark-sql_2.12 3.5.5 provided
320+
org.apache.spark:spark-catalyst_2.12 3.5.7 provided
321+
org.apache.spark:spark-core_2.12 3.5.7 provided
322+
org.apache.spark:spark-sql_2.12 3.5.7 provided
323323
org.scala-lang:scala-compiler 2.12.20 provided
324324
org.slf4j:slf4j-reload4j 1.7.36 provided
325325
org.springframework.security:spring-security-core 5.8.15 provided

build/test/resources/log4j2-test.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
<Logger name="org.apache.sedona.core.serde" level="warn"/>
1212
<Logger name="org.apache.sedona.viz.core.Serde" level="warn"/>
1313
<Logger name="org.apache.sedona.core.joinJudgement" level="warn"/>
14+
<Logger name="org.apache.curator" level="warn"/>
15+
<Logger name="org.apache.zookeeper" level="warn"/>
1416
<Root level="info">
1517
<AppenderRef ref="Console"/>
1618
</Root>

docs/user/upgrade.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ The following dependencies have been upgraded:
106106
* kafka ``3.9.0`` -> ``3.9.1``
107107
* scala 2.12 ``2.12.19`` -> ``2.12.20``
108108
* scala 2.13 ``2.13.12`` -> ``2.13.16``
109+
* spark ``3.5.5`` -> ``3.5.7``
109110

110111
Removed Modules
111112
---------------

geomesa-accumulo/geomesa-accumulo-spark-runtime-accumulo20/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@
211211
<pattern>com.typesafe.scalalogging</pattern>
212212
<shadedPattern>org.locationtech.geomesa.shaded.com.typesafe.scalalogging</shadedPattern>
213213
</relocation>
214+
<relocation>
215+
<pattern>com.google.gson</pattern>
216+
<shadedPattern>org.locationtech.geomesa.shaded.com.google.gson</shadedPattern>
217+
</relocation>
214218
</relocations>
215219
</configuration>
216220
</execution>

geomesa-accumulo/geomesa-accumulo-spark-runtime-accumulo21/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,10 @@
208208
<pattern>com.typesafe.scalalogging</pattern>
209209
<shadedPattern>org.locationtech.geomesa.shaded.com.typesafe.scalalogging</shadedPattern>
210210
</relocation>
211+
<relocation>
212+
<pattern>com.google.gson</pattern>
213+
<shadedPattern>org.locationtech.geomesa.shaded.com.google.gson</shadedPattern>
214+
</relocation>
211215
</relocations>
212216
</configuration>
213217
</execution>

geomesa-fs/geomesa-fs-spark-runtime/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@
135135
<pattern>com.google.common</pattern>
136136
<shadedPattern>org.locationtech.geomesa.shaded.com.google.common</shadedPattern>
137137
</relocation>
138+
<relocation>
139+
<pattern>com.google.gson</pattern>
140+
<shadedPattern>org.locationtech.geomesa.shaded.com.google.gson</shadedPattern>
141+
</relocation>
138142
<relocation>
139143
<pattern>org.apache.commons.codec</pattern>
140144
<shadedPattern>org.locationtech.geomesa.shaded.org.apache.commons.codec</shadedPattern>

geomesa-gt/geomesa-gt-spark-runtime/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@
124124
<pattern>com.google.common</pattern>
125125
<shadedPattern>org.locationtech.geomesa.shaded.com.google.common</shadedPattern>
126126
</relocation>
127+
<relocation>
128+
<pattern>com.google.gson</pattern>
129+
<shadedPattern>org.locationtech.geomesa.shaded.com.google.gson</shadedPattern>
130+
</relocation>
127131
<relocation>
128132
<pattern>org.apache.commons.codec</pattern>
129133
<shadedPattern>org.locationtech.geomesa.shaded.org.apache.commons.codec</shadedPattern>

geomesa-gt/geomesa-gt-spark/src/main/scala/org/locationtech/geomesa/geotools/spark/GeoToolsSpatialRDDProvider.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import org.geotools.api.data.{DataStore, DataStoreFinder, Query, Transaction}
1616
import org.geotools.api.feature.simple.SimpleFeature
1717
import org.locationtech.geomesa.spark.{SpatialRDD, SpatialRDDProvider}
1818
import org.locationtech.geomesa.utils.collection.CloseableIterator
19+
import org.locationtech.geomesa.utils.conf.GeoMesaSystemProperties.SystemProperty
1920
import org.locationtech.geomesa.utils.geotools.FeatureUtils
2021
import org.locationtech.geomesa.utils.geotools.converters.FastConverter
2122
import org.locationtech.geomesa.utils.io.{WithClose, WithStore}
@@ -58,9 +59,11 @@ class GeoToolsSpatialRDDProvider extends SpatialRDDProvider with LazyLogging {
5859
* @param typeName simple feature type name
5960
*/
6061
override def save(rdd: RDD[SimpleFeature], params: Map[String, String], typeName: String): Unit = {
61-
WithStore[DataStore](params) { ds =>
62-
require(ds != null, "Could not load data store with the provided parameters")
63-
require(ds.getSchema(typeName) != null, "Schema must exist before calling save - use `DataStore.createSchema`")
62+
if (GeoToolsSpatialRDDProvider.StoreCheck.toBoolean.get) {
63+
WithStore[DataStore](params) { ds =>
64+
require(ds != null, "Could not load data store with the provided parameters")
65+
require(ds.getSchema(typeName) != null, "Schema must exist before calling save - use `DataStore.createSchema`")
66+
}
6467
}
6568

6669
rdd.foreachPartition { iter =>
@@ -72,3 +75,7 @@ class GeoToolsSpatialRDDProvider extends SpatialRDDProvider with LazyLogging {
7275
}
7376
}
7477
}
78+
79+
object GeoToolsSpatialRDDProvider {
80+
val StoreCheck: SystemProperty = SystemProperty("geomesa.gt.spark.store.check", "true")
81+
}

geomesa-hbase/geomesa-hbase-spark-runtime-hbase2/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@
177177
<pattern>com.google.common</pattern>
178178
<shadedPattern>org.locationtech.geomesa.shaded.com.google.common</shadedPattern>
179179
</relocation>
180+
<relocation>
181+
<pattern>com.google.gson</pattern>
182+
<shadedPattern>org.locationtech.geomesa.shaded.com.google.gson</shadedPattern>
183+
</relocation>
180184
<relocation>
181185
<pattern>org.apache.commons.codec</pattern>
182186
<shadedPattern>org.locationtech.geomesa.shaded.org.apache.commons.codec</shadedPattern>

0 commit comments

Comments
 (0)