version bump to 0.3.2

menishmueli · menishmueli · commit 8c5f0b376909 · 2025-04-06T10:50:51.000+03:00
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ See [Our Features](https://dataflint.gitbook.io/dataflint-for-spark/overview/our
 
 Install DataFlint via sbt:
 ```sbt
-libraryDependencies += "io.dataflint" %% "spark" % "0.3.1"
+libraryDependencies += "io.dataflint" %% "spark" % "0.3.2"
 ```
 
 Then instruct spark to load the DataFlint plugin:
@@ -76,7 +76,7 @@ Add these 2 configs to your pyspark session builder:
 ```python
 builder = pyspark.sql.SparkSession.builder
     ...
-    .config("spark.jars.packages", "io.dataflint:spark_2.12:0.3.1") \
+    .config("spark.jars.packages", "io.dataflint:spark_2.12:0.3.2") \
     .config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin") \
     ...
 ```
@@ -87,14 +87,14 @@ Alternatively, install DataFlint with **no code change** as a spark ivy package
 
 ```bash
 spark-submit
---packages io.dataflint:spark_2.12:0.3.1 \
+--packages io.dataflint:spark_2.12:0.3.2 \
 --conf spark.plugins=io.dataflint.spark.SparkDataflintPlugin \
 ...
 ```
 
 ### Additional installation options
 
-* There is also support for scala 2.13, if your spark cluster is using scala 2.13 change package name to io.dataflint:spark_**2.13**:0.3.1
+* There is also support for scala 2.13, if your spark cluster is using scala 2.13 change package name to io.dataflint:spark_**2.13**:0.3.2
 * For more installation options, including for **python** and **k8s spark-operator**, see [Install on Spark docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-spark)
 * For installing DataFlint in **spark history server** for observability on completed runs see [install on spark history server docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-spark-history-server)
 * For installing DataFlint on **DataBricks** see [install on databricks docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-databricks)
diff --git a/spark-plugin/build.sbt b/spark-plugin/build.sbt
@@ -1,6 +1,6 @@
 import xerial.sbt.Sonatype._
 
-lazy val versionNum: String = "0.3.1"
+lazy val versionNum: String = "0.3.2"
 lazy val scala212 = "2.12.18"
 lazy val scala213 = "2.13.12"
 lazy val supportedScalaVersions = List(scala212, scala213)
diff --git a/spark-ui/package-lock.json b/spark-ui/package-lock.json
diff --git a/spark-ui/package.json b/spark-ui/package.json
@@ -1,6 +1,6 @@
 {
   "name": "dataflint-ui",
-  "version": "0.3.1",
+  "version": "0.3.2",
   "homepage": "./",
   "private": true,
   "dependencies": {
diff --git a/spark-ui/src/components/SqlFlow/StageNode.tsx b/spark-ui/src/components/SqlFlow/StageNode.tsx
@@ -541,23 +541,6 @@ export const StageNode: FC<{
     }
   }
 
-  if (data.node.nodeName === "Exchange") {
-    const partitionsMetric = parseFloat(
-      data.node.metrics
-        .find((metric) => metric.name === "partitions")
-        ?.value?.replaceAll(",", "") ?? "0"
-    );
-    const shuffleWriteMetric = getSizeFromMetrics(data.node.metrics)
-
-    if (partitionsMetric && shuffleWriteMetric) {
-      const avgPartitionSize = shuffleWriteMetric / partitionsMetric;
-      const avgPartitionSizeString = humanFileSize(avgPartitionSize);
-      dataTable.push({
-        name: "Average Write Partition Size",
-        value: avgPartitionSizeString
-      });
-    }
-  }
 
   if (data.node.nodeName === "AQEShuffleRead") {
     const partitionsMetric = parseFloat(
diff --git a/spark-ui/src/reducers/Alerts/MaxPartitionToBigAlert.ts b/spark-ui/src/reducers/Alerts/MaxPartitionToBigAlert.ts
@@ -5,8 +5,8 @@ import {
 } from "../../interfaces/AppStore";
 import { humanFileSize } from "../../utils/FormatUtils";
 
-// 20GB threshold in bytes
-const MAX_PARTITION_SIZE_THRESHOLD = 1;
+// 5GB threshold in bytes
+const MAX_PARTITION_SIZE_THRESHOLD = 5 * 1024 * 1024 * 1024;
 
 export function reduceMaxPartitionToBigAlert(
     sql: SparkSQLStore,
@@ -53,13 +53,8 @@ function checkStageForLargePartitions(
         let maxPartitionSize = 0;
         let dataType = "";
 
-        if (stageData.shuffleWriteDistribution && stageData.shuffleWriteDistribution.length > 10 && stageData.shuffleWriteDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
-            // The last element (index 10) is the maximum value
-            maxPartitionSize = stageData.shuffleWriteDistribution[10];
-            dataType = "shuffle write";
-        }
         // If no shuffle write, check output distribution
-        else if (stageData.outputDistribution && stageData.outputDistribution.length > 10 && stageData.outputDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
+        if (stageData.outputDistribution && stageData.outputDistribution.length > 10 && stageData.outputDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
             maxPartitionSize = stageData.outputDistribution[10];
             dataType = "output";
         }
@@ -68,6 +63,15 @@ function checkStageForLargePartitions(
             maxPartitionSize = stageData.inputDistribution[10];
             dataType = "input";
         }
+        else if (stageData.shuffleWriteDistribution && stageData.shuffleWriteDistribution.length > 10 && stageData.shuffleWriteDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
+            // The last element (index 10) is the maximum value
+            maxPartitionSize = stageData.shuffleWriteDistribution[10];
+            dataType = "shuffle write";
+        } else if (stageData.shuffleReadDistribution && stageData.shuffleReadDistribution.length > 10 && stageData.shuffleReadDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
+            // The last element (index 10) is the maximum value
+            maxPartitionSize = stageData.shuffleReadDistribution[10];
+            dataType = "shuffle read";
+        }
 
         // If the maximum partition size exceeds our threshold, add an alert
         if (maxPartitionSize !== 0) {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "dataflint-ui",`
`3`		`- "version": "0.3.1",`
	`3`	`+ "version": "0.3.2",`
`4`	`4`	`"homepage": "./",`
`5`	`5`	`"private": true,`
`6`	`6`	`"dependencies": {`