From 8e7ec5aee133f643b76c0d08004471c5551b7411 Mon Sep 17 00:00:00 2001 From: beat4ocean Date: Wed, 16 Apr 2025 22:59:53 +0800 Subject: [PATCH] [Improvement][Engine] Upgrade to Spark 3.4.4 --- README.md | 2 +- README.zh-CN.md | 2 +- .../livy/executor/parameter/SparkVersion.java | 6 ++-- .../datavines-engine-livy/pom.xml | 6 ++-- .../datavines-engine-spark-api/pom.xml | 29 ++++--------------- .../spark/executor/SparkEngineExecutor.java | 9 +++--- .../executor/parameter/SparkVersion.java | 6 ++-- .../datavines-engine-spark/pom.xml | 28 ++++-------------- pom.xml | 7 +++-- 9 files changed, 29 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 61b49152f..edba489d8 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ The platform is based on plug-in design, and the following modules support user- - **Data Source**: `MySQL`, `Impala`, `StarRocks`, `Doris`, `Presto`, `Trino`, `ClickHouse`, `PostgreSQL` are already supported - **Check Rules**: 27 check rules such as built-in null value check, non-null check, enumeration check, etc. -- **Job Execution Engine**: Two execution engines `Spark` and `Local` have been supported. The `Spark` engine currently only supports the `Spark2.4` version, and the `Local` engine is a local execution engine developed based on `JDBC`, without relying on other execution engines. +- **Job Execution Engine**: Two execution engines `Spark` and `Local` have been supported. The `Spark` engine currently supports the `Spark3.4.x` version, and the `Local` engine is a local execution engine developed based on `JDBC`, without relying on other execution engines. - **Alert Channel**: Supported **Email** - **Error Data Storage**: `MySQL` and **local files** are already supported (only `Local` execution engine is supported) - **Registry**: Already supports `MySQL`, `PostgreSQL` and `ZooKeeper` diff --git a/README.zh-CN.md b/README.zh-CN.md index 57b8144c0..080a0329e 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -72,7 +72,7 @@ $ mvn clean package -Prelease -DskipTests - **数据源**:已支持 `MySQL`、`Impala`、`StarRocks`、`Doris`、`Presto`、`Trino`、`ClickHouse`、`PostgreSQL` - **检查规则**:内置空值检查、非空检查、枚举检查等27个检查规则 -- **作业执行引擎**:已支持`Spark`和`Local`两种执行引擎。`Spark `引擎目前仅支持`Spark2.4`版本,`Local` 引擎则是基于`JDBC`开发的本地执行引擎,无需依赖其他执行引擎。 +- **作业执行引擎**:已支持`Spark`和`Local`两种执行引擎。`Spark `引擎目前支持`Spark3.4.x`版本,`Local` 引擎则是基于`JDBC`开发的本地执行引擎,无需依赖其他执行引擎。 - **告警通道**:已支持**邮件** - **错误数据存储**:已支持 `MySQL` 和 **本地文件**(仅支持`Local`执行引擎) - **注册中心**:已支持 `MySQL`、`PostgreSQL` 和 `ZooKeeper` diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-livy/datavines-engine-livy-executor/src/main/java/io/datavines/engine/livy/executor/parameter/SparkVersion.java b/datavines-engine/datavines-engine-plugins/datavines-engine-livy/datavines-engine-livy-executor/src/main/java/io/datavines/engine/livy/executor/parameter/SparkVersion.java index d82bbcdcf..86282e9ba 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-livy/datavines-engine-livy-executor/src/main/java/io/datavines/engine/livy/executor/parameter/SparkVersion.java +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-livy/datavines-engine-livy-executor/src/main/java/io/datavines/engine/livy/executor/parameter/SparkVersion.java @@ -19,11 +19,9 @@ public enum SparkVersion { /** - * 0 SPARK1 - * 1 SPARK2 + * 0 SPARK3 */ - SPARK2(0, "SPARK2"), - SPARK3(1, "SPARK3"); + SPARK3(0, "SPARK3"); SparkVersion(int code, String description) { this.code = code; diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-livy/pom.xml b/datavines-engine/datavines-engine-plugins/datavines-engine-livy/pom.xml index 7552052d1..11ec11028 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-livy/pom.xml +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-livy/pom.xml @@ -34,9 +34,9 @@ - 2.11 - 2.4.0 - 2.9.0 + 2.12 + 3.4.4 + 2.14.2 provided diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-api/pom.xml b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-api/pom.xml index 5357a13f0..a457fedb9 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-api/pom.xml +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-api/pom.xml @@ -32,9 +32,9 @@ datavines-engine-spark-api - 2.11 - 2.4.0 - 2.9.0 + 2.12 + 3.4.4 + 2.14.2 provided @@ -43,6 +43,7 @@ org.apache.spark spark-streaming_${scala.binary.version} + ${spark.version} ${scope} @@ -53,7 +54,7 @@ ${scope} - jackson-module-scala_2.11 + jackson-module-scala_${scala.binary.version} com.fasterxml.jackson.module @@ -76,24 +77,6 @@ org.apache.spark spark-hive_${scala.binary.version} ${spark.version} - - - commons-httpclient - commons-httpclient - - - org.apache.httpcomponents - httpclient - - - jackson-core-asl - org.codehaus.jackson - - - jackson-mapper-asl - org.codehaus.jackson - - ${scope} @@ -112,7 +95,7 @@ com.fasterxml.jackson.module - jackson-module-scala_2.11 + jackson-module-scala_${scala.binary.version} ${jackson.version} ${scope} diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/SparkEngineExecutor.java b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/SparkEngineExecutor.java index ba96b6a35..1124c06b4 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/SparkEngineExecutor.java +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/SparkEngineExecutor.java @@ -37,13 +37,14 @@ import io.datavines.engine.executor.core.executor.ShellCommandProcess; import io.datavines.engine.spark.executor.parameter.SparkArgsUtils; import io.datavines.engine.spark.executor.parameter.SparkParameters; +import io.datavines.engine.spark.executor.parameter.SparkVersion; public class SparkEngineExecutor extends AbstractYarnEngineExecutor { /** - * spark2 command + * spark command */ - private static final String SPARK2_COMMAND = "${SPARK_HOME2}/bin/spark-submit"; + private static final String SPARK_COMMAND = "${SPARK_HOME}/bin/spark-submit"; private Configurations configurations; @@ -64,7 +65,7 @@ public void init(JobExecutionRequest jobExecutionRequest, Logger logger, Configu public void execute() throws Exception { try { this.processResult = shellCommandProcess.run(buildCommand()); - logger.info("process result: "+ JSONUtils.toJsonString(this.processResult)); + logger.info("process result: {}", JSONUtils.toJsonString(this.processResult)); } catch (Exception e) { logger.error("yarn process failure", e); throw e; @@ -139,7 +140,7 @@ protected String buildCommand() { List args = new ArrayList<>(); - args.add(SPARK2_COMMAND); + args.add(SPARK_COMMAND); args.addAll(SparkArgsUtils.buildArgs(sparkParameters)); diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/parameter/SparkVersion.java b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/parameter/SparkVersion.java index d27695058..fa6b16683 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/parameter/SparkVersion.java +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/datavines-engine-spark-executor/src/main/java/io/datavines/engine/spark/executor/parameter/SparkVersion.java @@ -19,11 +19,9 @@ public enum SparkVersion { /** - * 0 SPARK1 - * 1 SPARK2 + * 0 SPARK3 */ - SPARK2(0, "SPARK2"), - SPARK3(1, "SPARK3"); + SPARK3(0, "SPARK3"); SparkVersion(int code, String description) { this.code = code; diff --git a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/pom.xml b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/pom.xml index 5877e34ad..73b84fe89 100644 --- a/datavines-engine/datavines-engine-plugins/datavines-engine-spark/pom.xml +++ b/datavines-engine/datavines-engine-plugins/datavines-engine-spark/pom.xml @@ -42,9 +42,9 @@ - 2.11 - 2.4.0 - 2.9.0 + 2.12 + 3.4.4 + 2.14.2 provided @@ -64,7 +64,7 @@ ${spark.scope} - jackson-module-scala_2.11 + jackson-module-scala_${scala.binary.version} com.fasterxml.jackson.module @@ -86,7 +86,7 @@ org.mongodb.spark mongo-spark-connector_${scala.binary.version} - ${spark.version} + 10.2.0 ${spark.scope} @@ -94,24 +94,6 @@ org.apache.spark spark-hive_${scala.binary.version} ${spark.version} - - - commons-httpclient - commons-httpclient - - - org.apache.httpcomponents - httpclient - - - jackson-core-asl - org.codehaus.jackson - - - jackson-mapper-asl - org.codehaus.jackson - - ${spark.scope} diff --git a/pom.xml b/pom.xml index 4d0df9170..5e7a9d8c9 100644 --- a/pom.xml +++ b/pom.xml @@ -49,13 +49,13 @@ 1.0.0 1.7.25 1.7.25 - 2.12.5 + 2.15.2 5.8.7 4.1 20.0 1.8 - 2.11.8 - 2.11 + 2.12.15 + 2.12 2.7.5 8.4.0 false @@ -113,6 +113,7 @@ 1.6.2 19.3.0.0 12.1.0.2.0 + 10.4.1 0.20.0 2.0.1