Skip to content

Commit

Permalink
DynamoDB runtime dependencies for Delta S3 multi-writer
Browse files Browse the repository at this point in the history
Delta has a "S3 multi-cluster" mode, which allows safe cocurrent writes
by multiple loaders.

To enable this mode, users can set the spark options documented by
Delta:

- `spark.delta.logStore.s3.impl=io.delta.storage.S3DynamoDBLogStore`
- `spark.io.delta.storage.S3DynamoDBLogStore.ddb.tableName=???`
- `spark.io.delta.storage.S3DynamoDBLogStore.ddb.region=???`
- (and some others)

This commit adds the necessary runtime dependencies for this mode.
  • Loading branch information
istreeter authored and oguzhanunlu committed Oct 28, 2024
1 parent 159afd6 commit e39d411
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,15 @@ object Dependencies {
}

// spark and hadoop
val delta = "io.delta" %% "delta-spark" % V.delta
val hudi = "org.apache.hudi" %% s"hudi-spark${V.Spark.forHudiMinor}-bundle" % V.hudi
val iceberg = "org.apache.iceberg" %% s"iceberg-spark-runtime-${V.Spark.forIcebergDeltaMinor}" % V.iceberg
val hadoopClient = "org.apache.hadoop" % "hadoop-client-runtime" % V.hadoop
val hadoopAzure = "org.apache.hadoop" % "hadoop-azure" % V.hadoop
val hadoopAws = "org.apache.hadoop" % "hadoop-aws" % V.hadoop
val gcsConnector = "com.google.cloud.bigdataoss" % "gcs-connector" % V.gcsConnector
val hiveCommon = "org.apache.hive" % "hive-common" % V.hive
val delta = "io.delta" %% "delta-spark" % V.delta
val deltaDynamodb = "io.delta" % "delta-storage-s3-dynamodb" % V.delta
val hudi = "org.apache.hudi" %% s"hudi-spark${V.Spark.forHudiMinor}-bundle" % V.hudi
val iceberg = "org.apache.iceberg" %% s"iceberg-spark-runtime-${V.Spark.forIcebergDeltaMinor}" % V.iceberg
val hadoopClient = "org.apache.hadoop" % "hadoop-client-runtime" % V.hadoop
val hadoopAzure = "org.apache.hadoop" % "hadoop-azure" % V.hadoop
val hadoopAws = "org.apache.hadoop" % "hadoop-aws" % V.hadoop
val gcsConnector = "com.google.cloud.bigdataoss" % "gcs-connector" % V.gcsConnector
val hiveCommon = "org.apache.hive" % "hive-common" % V.hive

val hudiAws = ("org.apache.hudi" % "hudi-aws" % V.hudiAws).excludeAll(ExclusionRule(organization = "org.apache.hudi"))

Expand All @@ -98,6 +99,7 @@ object Dependencies {
val awsS3 = "software.amazon.awssdk" % "s3" % V.awsSdk2
val awsS3Transfer = "software.amazon.awssdk" % "s3-transfer-manager" % V.awsSdk2
val awsSts = "software.amazon.awssdk" % "sts" % V.awsSdk2
val dynamodbSdk1 = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.awsSdk1
val awsRegistry = "software.amazon.glue" % "schema-registry-serde" % V.awsRegistry

// transitive overrides
Expand Down Expand Up @@ -162,8 +164,10 @@ object Dependencies {
awsCore, // Dependency on aws sdk v1 will likely be removed in the next release of hadoop-aws
awsS3,
awsGlue,
awsSts,
awsS3Transfer % Runtime,
awsSts
deltaDynamodb % Runtime,
dynamodbSdk1 % Runtime
) ++ commonRuntimeDependencies

val azureDependencies = Seq(
Expand Down

0 comments on commit e39d411

Please sign in to comment.