Skip to content

Commit 8c5f0b3

Browse files
committed
version bump to 0.3.2
1 parent c387915 commit 8c5f0b3

File tree

6 files changed

+20
-33
lines changed

6 files changed

+20
-33
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ See [Our Features](https://dataflint.gitbook.io/dataflint-for-spark/overview/our
5858

5959
Install DataFlint via sbt:
6060
```sbt
61-
libraryDependencies += "io.dataflint" %% "spark" % "0.3.1"
61+
libraryDependencies += "io.dataflint" %% "spark" % "0.3.2"
6262
```
6363

6464
Then instruct spark to load the DataFlint plugin:
@@ -76,7 +76,7 @@ Add these 2 configs to your pyspark session builder:
7676
```python
7777
builder = pyspark.sql.SparkSession.builder
7878
...
79-
.config("spark.jars.packages", "io.dataflint:spark_2.12:0.3.1") \
79+
.config("spark.jars.packages", "io.dataflint:spark_2.12:0.3.2") \
8080
.config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin") \
8181
...
8282
```
@@ -87,14 +87,14 @@ Alternatively, install DataFlint with **no code change** as a spark ivy package
8787

8888
```bash
8989
spark-submit
90-
--packages io.dataflint:spark_2.12:0.3.1 \
90+
--packages io.dataflint:spark_2.12:0.3.2 \
9191
--conf spark.plugins=io.dataflint.spark.SparkDataflintPlugin \
9292
...
9393
```
9494

9595
### Additional installation options
9696

97-
* There is also support for scala 2.13, if your spark cluster is using scala 2.13 change package name to io.dataflint:spark_**2.13**:0.3.1
97+
* There is also support for scala 2.13, if your spark cluster is using scala 2.13 change package name to io.dataflint:spark_**2.13**:0.3.2
9898
* For more installation options, including for **python** and **k8s spark-operator**, see [Install on Spark docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-spark)
9999
* For installing DataFlint in **spark history server** for observability on completed runs see [install on spark history server docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-spark-history-server)
100100
* For installing DataFlint on **DataBricks** see [install on databricks docs](https://dataflint.gitbook.io/dataflint-for-spark/getting-started/install-on-databricks)

spark-plugin/build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import xerial.sbt.Sonatype._
22

3-
lazy val versionNum: String = "0.3.1"
3+
lazy val versionNum: String = "0.3.2"
44
lazy val scala212 = "2.12.18"
55
lazy val scala213 = "2.13.12"
66
lazy val supportedScalaVersions = List(scala212, scala213)

spark-ui/package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spark-ui/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "dataflint-ui",
3-
"version": "0.3.1",
3+
"version": "0.3.2",
44
"homepage": "./",
55
"private": true,
66
"dependencies": {

spark-ui/src/components/SqlFlow/StageNode.tsx

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -541,23 +541,6 @@ export const StageNode: FC<{
541541
}
542542
}
543543

544-
if (data.node.nodeName === "Exchange") {
545-
const partitionsMetric = parseFloat(
546-
data.node.metrics
547-
.find((metric) => metric.name === "partitions")
548-
?.value?.replaceAll(",", "") ?? "0"
549-
);
550-
const shuffleWriteMetric = getSizeFromMetrics(data.node.metrics)
551-
552-
if (partitionsMetric && shuffleWriteMetric) {
553-
const avgPartitionSize = shuffleWriteMetric / partitionsMetric;
554-
const avgPartitionSizeString = humanFileSize(avgPartitionSize);
555-
dataTable.push({
556-
name: "Average Write Partition Size",
557-
value: avgPartitionSizeString
558-
});
559-
}
560-
}
561544

562545
if (data.node.nodeName === "AQEShuffleRead") {
563546
const partitionsMetric = parseFloat(

spark-ui/src/reducers/Alerts/MaxPartitionToBigAlert.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import {
55
} from "../../interfaces/AppStore";
66
import { humanFileSize } from "../../utils/FormatUtils";
77

8-
// 20GB threshold in bytes
9-
const MAX_PARTITION_SIZE_THRESHOLD = 1;
8+
// 5GB threshold in bytes
9+
const MAX_PARTITION_SIZE_THRESHOLD = 5 * 1024 * 1024 * 1024;
1010

1111
export function reduceMaxPartitionToBigAlert(
1212
sql: SparkSQLStore,
@@ -53,13 +53,8 @@ function checkStageForLargePartitions(
5353
let maxPartitionSize = 0;
5454
let dataType = "";
5555

56-
if (stageData.shuffleWriteDistribution && stageData.shuffleWriteDistribution.length > 10 && stageData.shuffleWriteDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
57-
// The last element (index 10) is the maximum value
58-
maxPartitionSize = stageData.shuffleWriteDistribution[10];
59-
dataType = "shuffle write";
60-
}
6156
// If no shuffle write, check output distribution
62-
else if (stageData.outputDistribution && stageData.outputDistribution.length > 10 && stageData.outputDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
57+
if (stageData.outputDistribution && stageData.outputDistribution.length > 10 && stageData.outputDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
6358
maxPartitionSize = stageData.outputDistribution[10];
6459
dataType = "output";
6560
}
@@ -68,6 +63,15 @@ function checkStageForLargePartitions(
6863
maxPartitionSize = stageData.inputDistribution[10];
6964
dataType = "input";
7065
}
66+
else if (stageData.shuffleWriteDistribution && stageData.shuffleWriteDistribution.length > 10 && stageData.shuffleWriteDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
67+
// The last element (index 10) is the maximum value
68+
maxPartitionSize = stageData.shuffleWriteDistribution[10];
69+
dataType = "shuffle write";
70+
} else if (stageData.shuffleReadDistribution && stageData.shuffleReadDistribution.length > 10 && stageData.shuffleReadDistribution[10] > MAX_PARTITION_SIZE_THRESHOLD) {
71+
// The last element (index 10) is the maximum value
72+
maxPartitionSize = stageData.shuffleReadDistribution[10];
73+
dataType = "shuffle read";
74+
}
7175

7276
// If the maximum partition size exceeds our threshold, add an alert
7377
if (maxPartitionSize !== 0) {

0 commit comments

Comments
 (0)