From 5cb6f9c4faecea77dc6b29f2496d9db35586da6b Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 7 Nov 2024 22:02:12 +0800 Subject: [PATCH 1/9] Support Azure block storage for Gravitino server and GVFS Java client --- LICENSE.bin | 1 + build.gradle.kts | 4 +- bundles/azure-bundle/build.gradle.kts | 62 ++++++ .../abs/fs/ABSFileSystemProvider.java | 69 ++++++ ...itino.catalog.hadoop.fs.FileSystemProvider | 20 ++ .../gravitino/storage/ABSProperties.java | 10 + catalogs/catalog-hadoop/build.gradle.kts | 1 + .../integration/test/HadoopABSCatalogIT.java | 197 ++++++++++++++++++ clients/filesystem-hadoop3/build.gradle.kts | 1 + .../test/GravitinoVirtualFileSystemABSIT.java | 163 +++++++++++++++ gradle/libs.versions.toml | 2 + settings.gradle.kts | 2 + 12 files changed, 530 insertions(+), 2 deletions(-) create mode 100644 bundles/azure-bundle/build.gradle.kts create mode 100644 bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java create mode 100644 bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java create mode 100644 catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java create mode 100644 clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java diff --git a/LICENSE.bin b/LICENSE.bin index 738687a6a2..9ab5edbd6f 100644 --- a/LICENSE.bin +++ b/LICENSE.bin @@ -285,6 +285,7 @@ Apache Hadoop Aliyun connector Apache Hadoop GCS connector Apache Hadoop AWS connector + Apache Hadoop Azure connector Apache Hadoop Annotatations Apache Hadoop Auth Apache Hadoop Client Aggregator diff --git a/build.gradle.kts b/build.gradle.kts index e6c49df406..01f1043e99 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -766,7 +766,7 @@ tasks { !it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" && it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink") && it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") && - it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" + it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" ) { from(it.configurations.runtimeClasspath) into("distribution/package/libs") @@ -788,7 +788,7 @@ tasks { !it.name.startsWith("trino-connector") && it.name != "bundled-catalog" && it.name != "hive-metastore-common" && it.name != "gcp-bundle" && - it.name != "aliyun-bundle" && it.name != "aws-bundle" + it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" ) { dependsOn("${it.name}:build") from("${it.name}/build/libs") diff --git a/bundles/azure-bundle/build.gradle.kts b/bundles/azure-bundle/build.gradle.kts new file mode 100644 index 0000000000..fa6a68d1af --- /dev/null +++ b/bundles/azure-bundle/build.gradle.kts @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":api")) + compileOnly(project(":core")) + compileOnly(project(":catalogs:catalog-hadoop")) + + compileOnly(libs.hadoop3.common) + + implementation(libs.commons.lang3) + // runtime used + implementation(libs.commons.logging) + implementation(libs.hadoop3.abs) + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + + // Relocate dependencies to avoid conflicts + relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents") + relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons") + relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml") + relocate("com.google.guava", "org.apache.gravitino.azure.shaded.com.google.guava") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java new file mode 100644 index 0000000000..5654131ca4 --- /dev/null +++ b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.abs.fs; + +import com.google.common.collect.ImmutableMap; +import java.io.IOException; +import java.util.Map; +import javax.annotation.Nonnull; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; +import org.apache.gravitino.storage.ABSProperties; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class ABSFileSystemProvider implements FileSystemProvider { + + private static final String ABS_PROVIDER_SCHEME = "wasbs"; + public static final String ABS_PROVIDER_NAME = "abs"; + + @Override + public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map config) + throws IOException { + Configuration configuration = new Configuration(); + + Map hadoopConfMap = + FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of()); + + if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME) + && config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) { + hadoopConfMap.put( + String.format( + "fs.azure.account.key.%s.blob.core.windows.net", + config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)), + config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)); + } + + hadoopConfMap.forEach(configuration::set); + + return FileSystem.get(path.toUri(), configuration); + } + + @Override + public String scheme() { + return ABS_PROVIDER_SCHEME; + } + + @Override + public String name() { + return ABS_PROVIDER_NAME; + } +} diff --git a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider new file mode 100644 index 0000000000..20dad43fb0 --- /dev/null +++ b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.gravitino.abs.fs.ABSFileSystemProvider \ No newline at end of file diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java new file mode 100644 index 0000000000..e2e441d331 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java @@ -0,0 +1,10 @@ +package org.apache.gravitino.storage; + +public class ABSProperties { + + // The account name of the Azure Blob Storage. + public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name"; + + // The account key of the Azure Blob Storage. + public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key"; +} diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index c925d1b92d..409a87fb10 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -80,6 +80,7 @@ dependencies { testImplementation(project(":bundles:aws-bundle")) testImplementation(project(":bundles:gcp-bundle")) testImplementation(project(":bundles:aliyun-bundle")) + testImplementation(project(":bundles:azure-bundle")) testImplementation(libs.minikdc) testImplementation(libs.hadoop3.minicluster) diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java new file mode 100644 index 0000000000..62718ce2f9 --- /dev/null +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.hadoop.integration.test; + +import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Schema; +import org.apache.gravitino.file.Fileset; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.gravitino.storage.ABSProperties; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIf; +import org.junit.platform.commons.util.StringUtils; + +@EnabledIf("absEnabled") +public class HadoopABSCatalogIT extends HadoopCatalogIT { + + public static final String ABS_ACCOUNT_NAME = System.getenv("ABS_ACCOUNT_NAME"); + public static final String ABS_ACCOUNT_KEY = System.getenv("ABS_ACCOUNT_KEY"); + public static final String ABS_CONTAINER_NAME = System.getenv("ABS_CONTAINER_NAME"); + + @Override + public void startIntegrationTest() throws Exception { + // Just overwrite super, do nothing. + } + + @BeforeAll + public void setup() throws IOException { + copyBundleJarsToHadoop("azure-bundle"); + + try { + super.startIntegrationTest(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake"); + catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog"); + schemaName = GravitinoITUtils.genRandomName("CatalogFilesetIT_schema"); + + schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); + Configuration conf = new Configuration(); + + conf.set( + String.format("fs.azure.account.key.%s.blob.core.windows.net", ABS_ACCOUNT_NAME), + ABS_ACCOUNT_KEY); + + fileSystem = + FileSystem.get( + URI.create( + String.format( + "wasbs://%s@%s.blob.core.windows.net", ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME)), + conf); + + createMetalake(); + createCatalog(); + createSchema(); + } + + protected String defaultBaseLocation() { + if (defaultBaseLocation == null) { + try { + Path bucket = + new Path( + String.format( + "wasbs://%s@%s.blob.core.windows.net/%s", + ABS_CONTAINER_NAME, + ABS_ACCOUNT_NAME, + GravitinoITUtils.genRandomName("CatalogFilesetIT"))); + + if (!fileSystem.exists(bucket)) { + fileSystem.mkdirs(bucket); + } + + defaultBaseLocation = bucket.toString(); + } catch (IOException e) { + throw new RuntimeException("Failed to create default base location", e); + } + } + + return defaultBaseLocation; + } + + protected void createCatalog() { + Map map = Maps.newHashMap(); + map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); + map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); + map.put(FILESYSTEM_PROVIDERS, "abs"); + metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, "comment", map); + + catalog = metalake.loadCatalog(catalogName); + } + + protected String generateLocation(String filesetName) { + return String.format("%s/%s", defaultBaseLocation, filesetName); + } + + @Test + public void testCreateSchemaAndFilesetWithSpecialLocation() { + String localCatalogName = GravitinoITUtils.genRandomName("local_catalog"); + + String ossLocation = + String.format( + "wasbs://%s@%s.blob.core.windows.net/%s", + ABS_CONTAINER_NAME, + ABS_ACCOUNT_NAME, + GravitinoITUtils.genRandomName("CatalogCatalogIT")); + Map catalogProps = Maps.newHashMap(); + catalogProps.put("location", ossLocation); + catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); + catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); + catalogProps.put(FILESYSTEM_PROVIDERS, "abs"); + + Catalog localCatalog = + metalake.createCatalog( + localCatalogName, Catalog.Type.FILESET, provider, "comment", catalogProps); + Assertions.assertEquals(ossLocation, localCatalog.properties().get("location")); + + // Create schema without specifying location. + Schema localSchema = + localCatalog + .asSchemas() + .createSchema("local_schema", "comment", ImmutableMap.of("key1", "val1")); + + Fileset localFileset = + localCatalog + .asFilesetCatalog() + .createFileset( + NameIdentifier.of(localSchema.name(), "local_fileset"), + "fileset comment", + Fileset.Type.MANAGED, + null, + ImmutableMap.of("k1", "v1")); + Assertions.assertEquals( + ossLocation + "/local_schema/local_fileset", localFileset.storageLocation()); + + // Delete schema + localCatalog.asSchemas().dropSchema(localSchema.name(), true); + + // Create schema with specifying location. + Map schemaProps = ImmutableMap.of("location", ossLocation); + Schema localSchema2 = + localCatalog.asSchemas().createSchema("local_schema2", "comment", schemaProps); + Assertions.assertEquals(ossLocation, localSchema2.properties().get("location")); + + Fileset localFileset2 = + localCatalog + .asFilesetCatalog() + .createFileset( + NameIdentifier.of(localSchema2.name(), "local_fileset2"), + "fileset comment", + Fileset.Type.MANAGED, + null, + ImmutableMap.of("k1", "v1")); + Assertions.assertEquals(ossLocation + "/local_fileset2", localFileset2.storageLocation()); + + // Delete schema + localCatalog.asSchemas().dropSchema(localSchema2.name(), true); + + // Delete catalog + metalake.dropCatalog(localCatalogName, true); + } + + private static boolean absEnabled() { + return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME")) + && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY")) + && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME")); + } +} diff --git a/clients/filesystem-hadoop3/build.gradle.kts b/clients/filesystem-hadoop3/build.gradle.kts index 9836c35147..55c0f59a05 100644 --- a/clients/filesystem-hadoop3/build.gradle.kts +++ b/clients/filesystem-hadoop3/build.gradle.kts @@ -45,6 +45,7 @@ dependencies { testImplementation(project(":bundles:gcp-bundle")) testImplementation(project(":bundles:aliyun-bundle")) testImplementation(project(":bundles:aws-bundle")) + testImplementation(project(":bundles:azure-bundle")) testImplementation(libs.awaitility) testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.jersey) diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java new file mode 100644 index 0000000000..9c5c194c85 --- /dev/null +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.filesystem.hadoop.integration.test; + +import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.abs.fs.ABSFileSystemProvider; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.gravitino.storage.ABSProperties; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.condition.EnabledIf; +import org.junit.platform.commons.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@EnabledIf("absEnabled") +public class GravitinoVirtualFileSystemABSIT extends GravitinoVirtualFileSystemIT { + private static final Logger LOG = LoggerFactory.getLogger(GravitinoVirtualFileSystemABSIT.class); + + public static final String ABS_ACCOUNT_NAME = System.getenv("ABS_ACCOUNT_NAME"); + public static final String ABS_ACCOUNT_KEY = System.getenv("ABS_ACCOUNT_KEY"); + public static final String ABS_CONTAINER_NAME = System.getenv("ABS_CONTAINER_NAME"); + + @BeforeAll + public void startIntegrationTest() { + // Do nothing + } + + @BeforeAll + public void startUp() throws Exception { + // Copy the GCP jars to the gravitino server if in deploy mode. + copyBundleJarsToHadoop("azure-bundle"); + // Need to download jars to gravitino server + super.startIntegrationTest(); + + // This value can be by tune by the user, please change it accordingly. + defaultBockSize = 32 * 1024 * 1024; + + // This value is 1 for ABFS, 3 for GCS, and 1 for S3A. + defaultReplication = 1; + + metalakeName = GravitinoITUtils.genRandomName("gvfs_it_metalake"); + catalogName = GravitinoITUtils.genRandomName("catalog"); + schemaName = GravitinoITUtils.genRandomName("schema"); + + Assertions.assertFalse(client.metalakeExists(metalakeName)); + metalake = client.createMetalake(metalakeName, "metalake comment", Collections.emptyMap()); + Assertions.assertTrue(client.metalakeExists(metalakeName)); + + Map properties = Maps.newHashMap(); + + properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); + properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); + properties.put(FILESYSTEM_PROVIDERS, "abs"); + + Catalog catalog = + metalake.createCatalog( + catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment", properties); + Assertions.assertTrue(metalake.catalogExists(catalogName)); + + catalog.asSchemas().createSchema(schemaName, "schema comment", properties); + Assertions.assertTrue(catalog.asSchemas().schemaExists(schemaName)); + + conf.set("fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem"); + conf.set("fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs"); + conf.set("fs.gvfs.impl.disable.cache", "true"); + conf.set("fs.gravitino.server.uri", serverUri); + conf.set("fs.gravitino.client.metalake", metalakeName); + + conf.set("fs.gvfs.filesystem.providers", ABSFileSystemProvider.ABS_PROVIDER_NAME); + // Pass this configuration to the real file system + conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); + conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); + } + + @AfterAll + public void tearDown() throws IOException { + Catalog catalog = metalake.loadCatalog(catalogName); + catalog.asSchemas().dropSchema(schemaName, true); + metalake.dropCatalog(catalogName, true); + client.dropMetalake(metalakeName, true); + + if (client != null) { + client.close(); + client = null; + } + + try { + closer.close(); + } catch (Exception e) { + LOG.error("Exception in closing CloseableGroup", e); + } + } + + /** + * Remove the `gravitino.bypass` prefix from the configuration and pass it to the real file system + * This method corresponds to the method org.apache.gravitino.filesystem.hadoop + * .GravitinoVirtualFileSystem#getConfigMap(Configuration) in the original code. + */ + protected Configuration convertGvfsConfigToRealFileSystemConfig(Configuration gvfsConf) { + Configuration absConf = new Configuration(); + Map map = Maps.newHashMap(); + + gvfsConf.forEach(entry -> map.put(entry.getKey(), entry.getValue())); + + Map hadoopConfMap = FileSystemUtils.toHadoopConfigMap(map, ImmutableMap.of()); + + if (gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME) != null + && gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY) != null) { + hadoopConfMap.put( + String.format( + "fs.azure.account.key.%s.blob.core.windows.net", + gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)), + gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)); + } + + hadoopConfMap.forEach(absConf::set); + + return absConf; + } + + protected String genStorageLocation(String fileset) { + return String.format( + "wasbs://%s@%s.blob.core.windows.net/%s", ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME, fileset); + } + + @Disabled("java.lang.UnsupportedOperationException: Append Support not enabled") + public void testAppend() throws IOException {} + + private static boolean absEnabled() { + return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME")) + && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY")) + && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME")); + } +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3241a48375..3a2ccb9d1d 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -35,6 +35,7 @@ hadoop2 = "2.10.2" hadoop3 = "3.1.0" hadoop3-gcs = "1.9.4-hadoop3" hadoop3-aliyun = "3.1.0" +hadoop3-abs = "3.1.0" hadoop-minikdc = "3.3.6" htrace-core4 = "4.1.0-incubating" httpclient5 = "5.2.1" @@ -169,6 +170,7 @@ hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version. hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", version.ref = "hadoop3-gcs"} hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", version.ref = "hadoop3-aliyun"} +hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure", version.ref = "hadoop3-abs"} htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} diff --git a/settings.gradle.kts b/settings.gradle.kts index 1f3efb4954..3b0d963659 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -74,3 +74,5 @@ include("integration-test-common") include(":bundles:aws-bundle") include(":bundles:gcp-bundle") include(":bundles:aliyun-bundle") +include("bundles:azure-bundle") +findProject(":bundles:azure-bundle")?.name = "azure-bundle" From 8dc612dbd8b7d3e4dce1b566a92200d1e8d5a638 Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 7 Nov 2024 22:25:09 +0800 Subject: [PATCH 2/9] fix rat problem. --- .../gravitino/storage/ABSProperties.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java index e2e441d331..a76ece32ba 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.gravitino.storage; public class ABSProperties { From 18789302a967b3257f8069c995c5ddfff058f9cc Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 11 Nov 2024 21:48:37 +0800 Subject: [PATCH 3/9] Update code. --- ...ider.java => AzureFileSystemProvider.java} | 18 ++++++++++--- ...itino.catalog.hadoop.fs.FileSystemProvider | 2 +- .../integration/test/HadoopABSCatalogIT.java | 27 ++++++++++--------- .../test/GravitinoVirtualFileSystemABSIT.java | 12 +++++---- docs/hadoop-catalog.md | 12 +++++++++ docs/how-to-use-gvfs.md | 11 ++++++++ gradle/libs.versions.toml | 2 +- 7 files changed, 61 insertions(+), 23 deletions(-) rename bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/{ABSFileSystemProvider.java => AzureFileSystemProvider.java} (77%) diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java similarity index 77% rename from bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java rename to bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java index 5654131ca4..d3f8d8db3a 100644 --- a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/ABSFileSystemProvider.java +++ b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java @@ -19,6 +19,7 @@ package org.apache.gravitino.abs.fs; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.Map; @@ -30,10 +31,15 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -public class ABSFileSystemProvider implements FileSystemProvider { +public class AzureFileSystemProvider implements FileSystemProvider { - private static final String ABS_PROVIDER_SCHEME = "wasbs"; - public static final String ABS_PROVIDER_NAME = "abs"; + @VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss"; + + @VisibleForTesting public static final String ABS_PROVIDER_NAME = "abfs"; + + private static final String ABFS_IMPL = "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem"; + + private static final String ABFS_IMPL_KEY = "fs.abfss.impl"; @Override public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map config) @@ -47,11 +53,15 @@ public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map && config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) { hadoopConfMap.put( String.format( - "fs.azure.account.key.%s.blob.core.windows.net", + "fs.azure.account.key.%s.dfs.core.windows.net", config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)), config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)); } + if (!config.containsKey(ABFS_IMPL_KEY)) { + configuration.set(ABFS_IMPL_KEY, ABFS_IMPL); + } + hadoopConfMap.forEach(configuration::set); return FileSystem.get(path.toUri(), configuration); diff --git a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider index 20dad43fb0..ab864341cc 100644 --- a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider +++ b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider @@ -17,4 +17,4 @@ # under the License. # -org.apache.gravitino.abs.fs.ABSFileSystemProvider \ No newline at end of file +org.apache.gravitino.abs.fs.AzureFileSystemProvider \ No newline at end of file diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java index 62718ce2f9..b05baa7acf 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java @@ -28,6 +28,7 @@ import org.apache.gravitino.Catalog; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Schema; +import org.apache.gravitino.abs.fs.AzureFileSystemProvider; import org.apache.gravitino.file.Fileset; import org.apache.gravitino.integration.test.util.GravitinoITUtils; import org.apache.gravitino.storage.ABSProperties; @@ -43,9 +44,9 @@ @EnabledIf("absEnabled") public class HadoopABSCatalogIT extends HadoopCatalogIT { - public static final String ABS_ACCOUNT_NAME = System.getenv("ABS_ACCOUNT_NAME"); - public static final String ABS_ACCOUNT_KEY = System.getenv("ABS_ACCOUNT_KEY"); - public static final String ABS_CONTAINER_NAME = System.getenv("ABS_CONTAINER_NAME"); + public static final String ABS_ACCOUNT_NAME = System.getenv("ADLS_ACCOUNT_NAME"); + public static final String ABS_ACCOUNT_KEY = System.getenv("ADLS_ACCOUNT_KEY"); + public static final String ABS_CONTAINER_NAME = System.getenv("ADLS_CONTAINER_NAME"); @Override public void startIntegrationTest() throws Exception { @@ -70,14 +71,14 @@ public void setup() throws IOException { Configuration conf = new Configuration(); conf.set( - String.format("fs.azure.account.key.%s.blob.core.windows.net", ABS_ACCOUNT_NAME), + String.format("fs.azure.account.key.%s.dfs.core.windows.net", ABS_ACCOUNT_NAME), ABS_ACCOUNT_KEY); fileSystem = FileSystem.get( URI.create( String.format( - "wasbs://%s@%s.blob.core.windows.net", ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME)), + "abfs://%s@%s.dfs.core.windows.net", ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME)), conf); createMetalake(); @@ -91,7 +92,8 @@ protected String defaultBaseLocation() { Path bucket = new Path( String.format( - "wasbs://%s@%s.blob.core.windows.net/%s", + "%s://%s@%s.dfs.core.windows.net/%s", + AzureFileSystemProvider.ABS_PROVIDER_SCHEME, ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME, GravitinoITUtils.genRandomName("CatalogFilesetIT"))); @@ -113,7 +115,7 @@ protected void createCatalog() { Map map = Maps.newHashMap(); map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); - map.put(FILESYSTEM_PROVIDERS, "abs"); + map.put(FILESYSTEM_PROVIDERS, AzureFileSystemProvider.ABS_PROVIDER_NAME); metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, "comment", map); catalog = metalake.loadCatalog(catalogName); @@ -129,7 +131,8 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() { String ossLocation = String.format( - "wasbs://%s@%s.blob.core.windows.net/%s", + "%s://%s@%s.dfs.core.windows.net/%s", + AzureFileSystemProvider.ABS_PROVIDER_SCHEME, ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME, GravitinoITUtils.genRandomName("CatalogCatalogIT")); @@ -137,7 +140,7 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() { catalogProps.put("location", ossLocation); catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); - catalogProps.put(FILESYSTEM_PROVIDERS, "abs"); + catalogProps.put(FILESYSTEM_PROVIDERS, AzureFileSystemProvider.ABS_PROVIDER_NAME); Catalog localCatalog = metalake.createCatalog( @@ -190,8 +193,8 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() { } private static boolean absEnabled() { - return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME")) - && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY")) - && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME")); + return StringUtils.isNotBlank(System.getenv("ADLS_ACCOUNT_NAME")) + && StringUtils.isNotBlank(System.getenv("ADLS_ACCOUNT_KEY")) + && StringUtils.isNotBlank(System.getenv("ADLS_CONTAINER_NAME")); } } diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java index 9c5c194c85..598656b122 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java @@ -27,7 +27,7 @@ import java.util.Collections; import java.util.Map; import org.apache.gravitino.Catalog; -import org.apache.gravitino.abs.fs.ABSFileSystemProvider; +import org.apache.gravitino.abs.fs.AzureFileSystemProvider; import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; import org.apache.gravitino.integration.test.util.GravitinoITUtils; import org.apache.gravitino.storage.ABSProperties; @@ -79,7 +79,7 @@ public void startUp() throws Exception { properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); - properties.put(FILESYSTEM_PROVIDERS, "abs"); + properties.put(FILESYSTEM_PROVIDERS, AzureFileSystemProvider.ABS_PROVIDER_NAME); Catalog catalog = metalake.createCatalog( @@ -95,10 +95,11 @@ public void startUp() throws Exception { conf.set("fs.gravitino.server.uri", serverUri); conf.set("fs.gravitino.client.metalake", metalakeName); - conf.set("fs.gvfs.filesystem.providers", ABSFileSystemProvider.ABS_PROVIDER_NAME); + conf.set("fs.gvfs.filesystem.providers", AzureFileSystemProvider.ABS_PROVIDER_NAME); // Pass this configuration to the real file system conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME); conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY); + conf.set("fs.abfss.impl", "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem"); } @AfterAll @@ -137,7 +138,7 @@ protected Configuration convertGvfsConfigToRealFileSystemConfig(Configuration gv && gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY) != null) { hadoopConfMap.put( String.format( - "fs.azure.account.key.%s.blob.core.windows.net", + "fs.azure.account.key.%s.dfs.core.windows.net", gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)), gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)); } @@ -149,7 +150,8 @@ protected Configuration convertGvfsConfigToRealFileSystemConfig(Configuration gv protected String genStorageLocation(String fileset) { return String.format( - "wasbs://%s@%s.blob.core.windows.net/%s", ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME, fileset); + "%s://%s@%s.dfs.core.windows.net/%s", + AzureFileSystemProvider.ABS_PROVIDER_SCHEME, ABS_CONTAINER_NAME, ABS_ACCOUNT_NAME, fileset); } @Disabled("java.lang.UnsupportedOperationException: Append Support not enabled") diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index 0622574d4d..70201509ec 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -76,6 +76,18 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp- In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. + +#### Azure Blob Storage fileset + +| Configuration item | Description | Default value | Required | Since version | +|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| +| `filesystem-providers` | The file system providers to add. Set it to `abfss` if it's a Azure block storage fileset, or a comma separated string that contains `abfss` like `oss,abfss,s3` to support multiple kinds of fileset including `abfss`. | (none) | Yes | 0.8.0-incubating | +| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for OSS, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | + +Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. + :::note - Gravitino contains builtin file system providers for local file system(`builtin-local`) and HDFS(`builtin-hdfs`), that is to say if `filesystem-providers` is not set, Gravitino will still support local file system and HDFS. Apart from that, you can set the `filesystem-providers` to support other file systems like S3, GCS, OSS or custom file system. - `default-filesystem-provider` is used to set the default file system provider for the Hadoop catalog. If the user does not specify the scheme in the URI, Gravitino will use the default file system provider to access the fileset. For example, if the default file system provider is set to `builtin-local`, the user can omit the prefix `file://` in the location. diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 7a3373092c..41237688c5 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -102,6 +102,17 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp- In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). +#### Azure blob storage fileset + +| Configuration item | Description | Default value | Required | Since version | +|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| +| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abfss` if it's a Azure block storage fileset, or a comma separated string that contains `abfss` like `oss,abfss,s3` to support multiple kinds of fileset including `abfss`. | (none) | Yes | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | + +Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). + + #### Custom fileset Since 0.7.0-incubating, users can define their own fileset type and configure the corresponding properties, for more, please refer to [Custom Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset). So, if you want to access the custom fileset through GVFS, you need to configure the corresponding properties. diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3a2ccb9d1d..a217b20bc1 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -35,7 +35,7 @@ hadoop2 = "2.10.2" hadoop3 = "3.1.0" hadoop3-gcs = "1.9.4-hadoop3" hadoop3-aliyun = "3.1.0" -hadoop3-abs = "3.1.0" +hadoop3-abs = "3.2.1" hadoop-minikdc = "3.3.6" htrace-core4 = "4.1.0-incubating" httpclient5 = "5.2.1" From 63d99ce43c1cb9cd7c55790416d217d3df234e04 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 Nov 2024 21:19:19 +0800 Subject: [PATCH 4/9] Fix --- .../integration/test/HadoopABSCatalogIT.java | 16 ++++++++-------- .../test/GravitinoVirtualFileSystemABSIT.java | 4 ++-- docs/hadoop-catalog.md | 2 +- docs/how-to-use-gvfs.md | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java index b05baa7acf..0da915a7d4 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java @@ -41,12 +41,12 @@ import org.junit.jupiter.api.condition.EnabledIf; import org.junit.platform.commons.util.StringUtils; -@EnabledIf("absEnabled") +@EnabledIf("absIsConfigured") public class HadoopABSCatalogIT extends HadoopCatalogIT { - public static final String ABS_ACCOUNT_NAME = System.getenv("ADLS_ACCOUNT_NAME"); - public static final String ABS_ACCOUNT_KEY = System.getenv("ADLS_ACCOUNT_KEY"); - public static final String ABS_CONTAINER_NAME = System.getenv("ADLS_CONTAINER_NAME"); + public static final String ABS_ACCOUNT_NAME = System.getenv("ABS_ACCOUNT_NAME"); + public static final String ABS_ACCOUNT_KEY = System.getenv("ABS_ACCOUNT_KEY"); + public static final String ABS_CONTAINER_NAME = System.getenv("ABS_CONTAINER_NAME"); @Override public void startIntegrationTest() throws Exception { @@ -192,9 +192,9 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() { metalake.dropCatalog(localCatalogName, true); } - private static boolean absEnabled() { - return StringUtils.isNotBlank(System.getenv("ADLS_ACCOUNT_NAME")) - && StringUtils.isNotBlank(System.getenv("ADLS_ACCOUNT_KEY")) - && StringUtils.isNotBlank(System.getenv("ADLS_CONTAINER_NAME")); + private static boolean absIsConfigured() { + return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME")) + && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY")) + && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME")); } } diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java index 598656b122..a6ad8d89ed 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java @@ -41,7 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@EnabledIf("absEnabled") +@EnabledIf("absIsConfigured") public class GravitinoVirtualFileSystemABSIT extends GravitinoVirtualFileSystemIT { private static final Logger LOG = LoggerFactory.getLogger(GravitinoVirtualFileSystemABSIT.class); @@ -157,7 +157,7 @@ protected String genStorageLocation(String fileset) { @Disabled("java.lang.UnsupportedOperationException: Append Support not enabled") public void testAppend() throws IOException {} - private static boolean absEnabled() { + private static boolean absIsConfigured() { return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME")) && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY")) && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME")); diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index 70201509ec..43613b2a86 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -81,7 +81,7 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy | Configuration item | Description | Default value | Required | Since version | |-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `filesystem-providers` | The file system providers to add. Set it to `abfss` if it's a Azure block storage fileset, or a comma separated string that contains `abfss` like `oss,abfss,s3` to support multiple kinds of fileset including `abfss`. | (none) | Yes | 0.8.0-incubating | +| `filesystem-providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | | `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for OSS, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | | `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | | `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 41237688c5..3a645c26f0 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -104,11 +104,11 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy #### Azure blob storage fileset -| Configuration item | Description | Default value | Required | Since version | -|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abfss` if it's a Azure block storage fileset, or a comma separated string that contains `abfss` like `oss,abfss,s3` to support multiple kinds of fileset including `abfss`. | (none) | Yes | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------| +| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). From 5ef2a2b84675c232217e372cde16ad1f199f784a Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 Nov 2024 21:49:49 +0800 Subject: [PATCH 5/9] Fix --- docs/hadoop-catalog.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index 43613b2a86..814a817f97 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -79,12 +79,12 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy #### Azure Blob Storage fileset -| Configuration item | Description | Default value | Required | Since version | -|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `filesystem-providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | -| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for OSS, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| +| `filesystem-providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | +| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure block store, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. From e59346ceaa6378458b3fbe897a43a66a91bcb374 Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 14 Nov 2024 10:15:15 +0800 Subject: [PATCH 6/9] Fix --- .../integration/test/GravitinoVirtualFileSystemABSIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java index a6ad8d89ed..a25c17397c 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java @@ -56,7 +56,7 @@ public void startIntegrationTest() { @BeforeAll public void startUp() throws Exception { - // Copy the GCP jars to the gravitino server if in deploy mode. + // Copy the Azure jars to the gravitino server if in deploy mode. copyBundleJarsToHadoop("azure-bundle"); // Need to download jars to gravitino server super.startIntegrationTest(); From a9c64f1238cb62c16e8da972977784da14c2e11a Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 14 Nov 2024 12:07:36 +0800 Subject: [PATCH 7/9] Update docs and fix minor mistakes. --- .../gravitino/abs/fs/AzureFileSystemProvider.java | 2 +- docs/hadoop-catalog.md | 12 ++++++------ docs/how-to-use-gvfs.md | 10 +++++----- settings.gradle.kts | 1 - 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java index d3f8d8db3a..cad38e14c9 100644 --- a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java +++ b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java @@ -35,7 +35,7 @@ public class AzureFileSystemProvider implements FileSystemProvider { @VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss"; - @VisibleForTesting public static final String ABS_PROVIDER_NAME = "abfs"; + @VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs"; private static final String ABFS_IMPL = "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem"; diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index 814a817f97..db54c94174 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -79,12 +79,12 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy #### Azure Blob Storage fileset -| Configuration item | Description | Default value | Required | Since version | -|-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `filesystem-providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | -| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure block store, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| +| `filesystem-providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abfs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | +| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure blob storage, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 3a645c26f0..57129ef785 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -104,11 +104,11 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy #### Azure blob storage fileset -| Configuration item | Description | Default value | Required | Since version | -|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------| -| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abfs` if it's a Azure block storage fileset, or a comma separated string that contains `abfs` like `oss,abfs,s3` to support multiple kinds of fileset including `abfs`. | (none) | Yes | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| Configuration item | Description | Default value | Required | Since version | +|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------| +| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). diff --git a/settings.gradle.kts b/settings.gradle.kts index 3b0d963659..2cde39c222 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -75,4 +75,3 @@ include(":bundles:aws-bundle") include(":bundles:gcp-bundle") include(":bundles:aliyun-bundle") include("bundles:azure-bundle") -findProject(":bundles:azure-bundle")?.name = "azure-bundle" From 654ef061d3e1ce1e24d207f7b9e899bee74e7a29 Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 14 Nov 2024 12:14:19 +0800 Subject: [PATCH 8/9] update docs --- .../integration/test/GravitinoVirtualFileSystemABSIT.java | 2 +- docs/hadoop-catalog.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java index a25c17397c..cc16ce920a 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java @@ -64,7 +64,7 @@ public void startUp() throws Exception { // This value can be by tune by the user, please change it accordingly. defaultBockSize = 32 * 1024 * 1024; - // This value is 1 for ABFS, 3 for GCS, and 1 for S3A. + // This value is 1 for ABS, 3 for GCS, and 1 for S3A. defaultReplication = 1; metalakeName = GravitinoITUtils.genRandomName("gvfs_it_metalake"); diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index db54c94174..9dfa567363 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -81,7 +81,7 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy | Configuration item | Description | Default value | Required | Since version | |-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `filesystem-providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abfs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | +| `filesystem-providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | | `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure blob storage, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | | `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | | `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | From 5b6e88022c6d61c73f119e2d24bc83f1b319bf7f Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 14 Nov 2024 20:50:54 +0800 Subject: [PATCH 9/9] update docs --- docs/hadoop-catalog.md | 8 ++++---- docs/how-to-use-gvfs.md | 11 ++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index 9dfa567363..f0fb9bb171 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -81,10 +81,10 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy | Configuration item | Description | Default value | Required | Since version | |-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------| -| `filesystem-providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | -| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure blob storage, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `filesystem-providers` | The file system providers to add. Set it to `abs` if it's a Azure Blob Storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | +| `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for Azure Blob Storage, if we set this value, we can omit the prefix 'abfss://' in the location. | `builtin-local` | No | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure Blob storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure Blob storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 57129ef785..6ea3a972d0 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -102,17 +102,16 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp- In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). -#### Azure blob storage fileset +#### Azure Blob Storage fileset | Configuration item | Description | Default value | Required | Since version | |--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------| -| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abs` if it's a Azure blob storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | -| `abs-account-name` | The account name of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | -| `abs-account-key` | The account key of Azure blob storage. | (none) | Yes if it's a Azure blob storage fileset. | 0.8.0-incubating | +| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to `abs` if it's a Azure Blob Storage fileset, or a comma separated string that contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including `abs`. | (none) | Yes | 0.8.0-incubating | +| `abs-account-name` | The account name of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | +| `abs-account-key` | The account key of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). - #### Custom fileset Since 0.7.0-incubating, users can define their own fileset type and configure the corresponding properties, for more, please refer to [Custom Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset). So, if you want to access the custom fileset through GVFS, you need to configure the corresponding properties. @@ -122,8 +121,6 @@ So, if you want to access the custom fileset through GVFS, you need to configure | `fs.gvfs.filesystem.providers` | The file system providers. please set it to the value of `YourCustomFileSystemProvider#name` | (none) | Yes | 0.7.0-incubating | | `your-custom-properties` | The properties will be used to create a FileSystem instance in `CustomFileSystemProvider#getFileSystem` | (none) | No | - | - - You can configure these properties in two ways: 1. Before obtaining the `FileSystem` in the code, construct a `Configuration` object and set its properties: