-
Notifications
You must be signed in to change notification settings - Fork 336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitino server and GVFS Java client #5508
[#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitino server and GVFS Java client #5508
Changes from all commits
5cb6f9c
8dc612d
1878930
21b4521
4370de0
63d99ce
2a3c73a
5ef2a2b
e59346c
a9c64f1
654ef06
5b6e880
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar | ||
|
||
plugins { | ||
`maven-publish` | ||
id("java") | ||
alias(libs.plugins.shadow) | ||
} | ||
|
||
dependencies { | ||
compileOnly(project(":api")) | ||
compileOnly(project(":core")) | ||
compileOnly(project(":catalogs:catalog-hadoop")) | ||
|
||
compileOnly(libs.hadoop3.common) | ||
|
||
implementation(libs.commons.lang3) | ||
// runtime used | ||
implementation(libs.commons.logging) | ||
implementation(libs.hadoop3.abs) | ||
implementation(project(":catalogs:catalog-common")) { | ||
exclude("*") | ||
} | ||
} | ||
|
||
tasks.withType(ShadowJar::class.java) { | ||
isZip64 = true | ||
configurations = listOf(project.configurations.runtimeClasspath.get()) | ||
archiveClassifier.set("") | ||
|
||
// Relocate dependencies to avoid conflicts | ||
relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents") | ||
relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons") | ||
relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml") | ||
relocate("com.google.guava", "org.apache.gravitino.azure.shaded.com.google.guava") | ||
} | ||
|
||
tasks.jar { | ||
dependsOn(tasks.named("shadowJar")) | ||
archiveClassifier.set("empty") | ||
} | ||
|
||
tasks.compileJava { | ||
dependsOn(":catalogs:catalog-hadoop:runtimeJars") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.gravitino.abs.fs; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.collect.ImmutableMap; | ||
import java.io.IOException; | ||
import java.util.Map; | ||
import javax.annotation.Nonnull; | ||
import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider; | ||
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; | ||
import org.apache.gravitino.storage.ABSProperties; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
|
||
public class AzureFileSystemProvider implements FileSystemProvider { | ||
|
||
@VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss"; | ||
|
||
@VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs"; | ||
|
||
private static final String ABFS_IMPL = "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem"; | ||
|
||
private static final String ABFS_IMPL_KEY = "fs.abfss.impl"; | ||
|
||
@Override | ||
public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, String> config) | ||
throws IOException { | ||
Configuration configuration = new Configuration(); | ||
|
||
Map<String, String> hadoopConfMap = | ||
FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of()); | ||
|
||
if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME) | ||
&& config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) { | ||
hadoopConfMap.put( | ||
String.format( | ||
"fs.azure.account.key.%s.dfs.core.windows.net", | ||
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)), | ||
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)); | ||
} | ||
Comment on lines
+52
to
+59
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can abfs work without these two configurations? if not, I think we should throw an exception here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we let users pass other Azure configurations here, I think we should not throw exceptions here. For example, users can set SAS Token and Azure Active Directory (AAD) related configuration through the bypass mechanism thought I'm not 100% sure whether uses can successfully do so. |
||
|
||
if (!config.containsKey(ABFS_IMPL_KEY)) { | ||
configuration.set(ABFS_IMPL_KEY, ABFS_IMPL); | ||
} | ||
|
||
hadoopConfMap.forEach(configuration::set); | ||
|
||
return FileSystem.get(path.toUri(), configuration); | ||
} | ||
|
||
@Override | ||
public String scheme() { | ||
return ABS_PROVIDER_SCHEME; | ||
} | ||
|
||
@Override | ||
public String name() { | ||
return ABS_PROVIDER_NAME; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
|
||
org.apache.gravitino.abs.fs.AzureFileSystemProvider |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.gravitino.storage; | ||
|
||
public class ABSProperties { | ||
|
||
// The account name of the Azure Blob Storage. | ||
public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name"; | ||
|
||
// The account key of the Azure Blob Storage. | ||
public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is similar to AKSK in Azure blob storage? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, Azure block storage also supports other two authentication mechanisms, which are quite complex than the current one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please make sure if this auth is a main stream auth for ABS, and also widely used by other systems? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, like AKSK, it is the most widely used by users and application, others like SAS Token and Azure Active Directory (AAD) are quite complicated and hard to configured. |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Iceberg uses this protocol, however,
wasbs
is also used by several softwares like DrimoThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's support this first, then we can add more support later on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if later on when we support wasb, can we still use this provider, or shall we create another provider?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I plan to introduce a name alias method to support multiple protocol in one provider.
Set schemeAlias() {
Sets.of("wasb", "wasbs", "abfs");
}
if the scheme of the path in it, it will also use this provider.