CaresianPartition format and cleanup

pomadchin · pomadchin · commit 6165638dd9f5 · 2025-09-20T15:59:24.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
-- Add ZStd compression support for GTiff
+- Add ZStd compression support for GTiff [#3580](https://github.com/locationtech/geotrellis/pull/3580)
+- Do not depend on private Spark API, avoids sealing violation [#3586](https://github.com/locationtech/geotrellis/pull/3586)
 
 ## [3.8.0] - 2025-04-23
 
diff --git a/spark/src/main/scala/geotrellis/spark/join/CartesianPartition.scala b/spark/src/main/scala/geotrellis/spark/join/CartesianPartition.scala
@@ -0,0 +1,52 @@
+package geotrellis.spark.join
+
+import org.apache.spark.Partition
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+
+import java.io.{IOException, ObjectOutputStream}
+import scala.util.control.NonFatal
+
+// https://github.com/apache/spark/blob/686d84453610e463df7df95395ce6ed36a6efacd/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala#L29
+private[join] class CartesianPartition(
+  idx: Int,
+  @transient private val rdd1: RDD[_],
+  @transient private val rdd2: RDD[_],
+  s1Index: Int,
+  s2Index: Int
+) extends Partition {
+
+  var s1 = rdd1.partitions(s1Index)
+  var s2 = rdd2.partitions(s2Index)
+  override val index: Int = idx
+
+  @throws(classOf[IOException])
+  private def writeObject(oos: ObjectOutputStream): Unit = CartesianPartition.tryOrIOException {
+    // Update the reference to parent split at the time of task serialization
+    s1 = rdd1.partitions(s1Index)
+    s2 = rdd2.partitions(s2Index)
+    oos.defaultWriteObject()
+  }
+}
+
+object CartesianPartition extends Logging {
+  /**
+   * Execute a block of code that returns a value, re-throwing any non-fatal uncaught
+   * exceptions as IOException. This is used when implementing Externalizable and Serializable's
+   * read and write methods, since Java's serializer will not report non-IOExceptions properly;
+   * see SPARK-4080 for more context.
+   */
+  // https://github.com/apache/spark/blob/686d84453610e463df7df95395ce6ed36a6efacd/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala#L35
+  private def tryOrIOException[T](block: => T): T = {
+    try {
+      block
+    } catch {
+      case e: IOException =>
+        logError("Exception encountered", e)
+        throw e
+      case NonFatal(e) =>
+        logError("Exception encountered", e)
+        throw new IOException(e)
+    }
+  }
+}
diff --git a/spark/src/main/scala/geotrellis/spark/join/FilteredCartesianRDD.scala b/spark/src/main/scala/geotrellis/spark/join/FilteredCartesianRDD.scala
@@ -23,47 +23,8 @@ package geotrellis.spark.join
 
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
-import org.log4s.getLogger
 
-import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
-import scala.util.control.NonFatal
-
-private class CartesianPartition(
-                          idx: Int,
-                          @transient private val rdd1: RDD[_],
-                          @transient private val rdd2: RDD[_],
-                          s1Index: Int,
-                          s2Index: Int
-                        ) extends Partition {
-
-  @transient private[this] lazy val logger = getLogger
-
-  var s1 = rdd1.partitions(s1Index)
-  var s2 = rdd2.partitions(s2Index)
-  override val index: Int = idx
-
-  private def tryOrIOException[T](block: => T): T = {
-    try {
-      block
-    } catch {
-      case e: IOException =>
-        logger.error(e)("Exception encountered")
-        throw e
-      case NonFatal(e) =>
-        logger.error(e)("Exception encountered")
-        throw new IOException(e)
-    }
-  }
-
-  @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream): Unit = tryOrIOException {
-    // Update the reference to parent split at the time of task serialization
-    s1 = rdd1.partitions(s1Index)
-    s2 = rdd2.partitions(s2Index)
-    oos.defaultWriteObject()
-  }
-}
 
 /** Performs a cartesian join of two RDDs using filter and refine pattern.
   *