Skip to content

Commit f21b9bc

Browse files
committed
apacheGH-38821: [C++] Strengthen handling of duplicate slashes in S3, GCS
Giving a path such as "bucket//key" to some S3 and GCS filesystem APIs could silently succeed or crash. Make sure those paths instead return an error, as other instances of duplicate slashes already do.
1 parent ef6ea6b commit f21b9bc

File tree

7 files changed

+72
-14
lines changed

7 files changed

+72
-14
lines changed

cpp/src/arrow/filesystem/filesystem_test.cc

+18-6
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,32 @@ TEST(PathUtil, GetAbstractPathParent) {
152152
AssertPairEqual(pair, {"abc", "def\\ghi"});
153153
}
154154

155-
TEST(PathUtil, ValidateAbstractPathParts) {
156-
ASSERT_OK(ValidateAbstractPathParts({}));
157-
ASSERT_OK(ValidateAbstractPathParts({"abc"}));
155+
TEST(PathUtil, ValidateAbstractPathPartsString) {
156+
ASSERT_OK(ValidateAbstractPathParts(""));
157+
ASSERT_OK(ValidateAbstractPathParts("abc"));
158+
ASSERT_OK(ValidateAbstractPathParts("abc/def"));
159+
ASSERT_OK(ValidateAbstractPathParts("abc/def.ghi"));
160+
ASSERT_OK(ValidateAbstractPathParts("abc/def\\ghi"));
161+
162+
// Extraneous separators
163+
ASSERT_RAISES(Invalid, ValidateAbstractPathParts("//"));
164+
ASSERT_RAISES(Invalid, ValidateAbstractPathParts("abc//def"));
165+
}
166+
167+
TEST(PathUtil, ValidateAbstractPathPartsVector) {
168+
ASSERT_OK(ValidateAbstractPathParts(std::vector<std::string>{}));
169+
ASSERT_OK(ValidateAbstractPathParts(std::vector<std::string>{"abc"}));
158170
ASSERT_OK(ValidateAbstractPathParts({"abc", "def"}));
159171
ASSERT_OK(ValidateAbstractPathParts({"abc", "def.ghi"}));
160172
ASSERT_OK(ValidateAbstractPathParts({"abc", "def\\ghi"}));
161173

162174
// Empty path component
163-
ASSERT_RAISES(Invalid, ValidateAbstractPathParts({""}));
175+
ASSERT_RAISES(Invalid, ValidateAbstractPathParts(std::vector<std::string>{""}));
164176
ASSERT_RAISES(Invalid, ValidateAbstractPathParts({"abc", "", "def"}));
165177

166178
// Separator in component
167-
ASSERT_RAISES(Invalid, ValidateAbstractPathParts({"/"}));
168-
ASSERT_RAISES(Invalid, ValidateAbstractPathParts({"abc/def"}));
179+
ASSERT_RAISES(Invalid, ValidateAbstractPathParts(std::vector<std::string>{"/"}));
180+
ASSERT_RAISES(Invalid, ValidateAbstractPathParts(std::vector<std::string>{"abc/def"}));
169181
}
170182

171183
TEST(PathUtil, ConcatAbstractPath) {

cpp/src/arrow/filesystem/gcsfs.cc

+9
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,18 @@ struct GcsPath {
7373
path.full_path = s;
7474
path.bucket = s.substr(0, first_sep);
7575
path.object = s.substr(first_sep + 1);
76+
RETURN_NOT_OK(Validate(path));
7677
return path;
7778
}
7879

80+
static Status Validate(const GcsPath& path) {
81+
auto st = internal::ValidateAbstractPathParts(path.full_path);
82+
if (!st.ok()) {
83+
return Status::Invalid(st.message(), " in path ", path.full_path);
84+
}
85+
return Status::OK();
86+
}
87+
7988
GcsPath parent() const {
8089
auto object_parent = internal::GetAbstractPathParent(object).first;
8190
if (object_parent.empty()) return GcsPath{bucket, bucket, ""};

cpp/src/arrow/filesystem/gcsfs_test.cc

+14
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,14 @@ TEST_F(GcsIntegrationTest, CreateDirUri) {
861861
ASSERT_RAISES(Invalid, fs->CreateDir("gs://" + RandomBucketName(), true));
862862
}
863863

864+
TEST_F(GcsIntegrationTest, CreateDirExtraneousSlashes) {
865+
auto fs = GcsFileSystem::Make(TestGcsOptions());
866+
ASSERT_RAISES(Invalid,
867+
fs->CreateDir(RandomBucketName() + "//somedir", /*recursive=*/true));
868+
ASSERT_RAISES(Invalid, fs->CreateDir(RandomBucketName() + "/somedir//newdir",
869+
/*recursive=*/true));
870+
}
871+
864872
TEST_F(GcsIntegrationTest, DeleteBucketDirSuccess) {
865873
auto fs = GcsFileSystem::Make(TestGcsOptions());
866874
ASSERT_OK(fs->CreateDir("pyarrow-filesystem/", /*recursive=*/true));
@@ -888,6 +896,12 @@ TEST_F(GcsIntegrationTest, DeleteDirUri) {
888896
ASSERT_RAISES(Invalid, fs->DeleteDir("gs://" + PreexistingBucketPath()));
889897
}
890898

899+
TEST_F(GcsIntegrationTest, DeleteDirExtraneousSlashes) {
900+
auto fs = GcsFileSystem::Make(TestGcsOptions());
901+
ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "/somedir"));
902+
ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "somedir//newdir"));
903+
}
904+
891905
TEST_F(GcsIntegrationTest, DeleteDirContentsSuccess) {
892906
auto fs = GcsFileSystem::Make(TestGcsOptions());
893907
ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));

cpp/src/arrow/filesystem/path_util.cc

+12
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ Status ValidateAbstractPathParts(const std::vector<std::string>& parts) {
137137
return Status::OK();
138138
}
139139

140+
Status ValidateAbstractPathParts(std::string_view path) {
141+
auto pos = path.find_first_of(kSep);
142+
while (pos != path.npos) {
143+
++pos;
144+
if (path.length() > pos && path[pos] == kSep) {
145+
return Status::Invalid("Empty path component");
146+
}
147+
pos = path.find_first_of(kSep, pos);
148+
}
149+
return Status::OK();
150+
}
151+
140152
std::string ConcatAbstractPath(std::string_view base, std::string_view stem) {
141153
DCHECK(!stem.empty());
142154
if (base.empty()) {

cpp/src/arrow/filesystem/path_util.h

+4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s);
6767
ARROW_EXPORT
6868
Status ValidateAbstractPathParts(const std::vector<std::string>& parts);
6969

70+
// Validate the components of an abstract path.
71+
ARROW_EXPORT
72+
Status ValidateAbstractPathParts(std::string_view path);
73+
7074
// Append a non-empty stem to an abstract path.
7175
ARROW_EXPORT
7276
std::string ConcatAbstractPath(std::string_view base, std::string_view stem);

cpp/src/arrow/filesystem/s3fs.cc

+7-8
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ using internal::S3Backend;
154154
using internal::ToAwsString;
155155
using internal::ToURLEncodedAwsString;
156156

157-
static const char kSep = '/';
158-
constexpr char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
159-
constexpr char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
157+
static constexpr const char kSep = '/';
158+
static constexpr const char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
159+
static constexpr const char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
160160

161161
// -----------------------------------------------------------------------
162162
// S3ProxyOptions implementation
@@ -481,12 +481,11 @@ struct S3Path {
481481
}
482482

483483
static Status Validate(const S3Path& path) {
484-
auto result = internal::ValidateAbstractPathParts(path.key_parts);
485-
if (!result.ok()) {
486-
return Status::Invalid(result.message(), " in path ", path.full_path);
487-
} else {
488-
return result;
484+
auto st = internal::ValidateAbstractPathParts(path.full_path);
485+
if (!st.ok()) {
486+
return Status::Invalid(st.message(), " in path ", path.full_path);
489487
}
488+
return Status::OK();
490489
}
491490

492491
Aws::String ToAwsString() const {

cpp/src/arrow/filesystem/s3fs_test.cc

+8
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,10 @@ TEST_F(TestS3FS, CreateDir) {
935935

936936
// URI
937937
ASSERT_RAISES(Invalid, fs_->CreateDir("s3:bucket/newdir2"));
938+
939+
// Extraneous slashes
940+
ASSERT_RAISES(Invalid, fs_->CreateDir("bucket//somedir"));
941+
ASSERT_RAISES(Invalid, fs_->CreateDir("bucket/somedir//newdir"));
938942
}
939943

940944
TEST_F(TestS3FS, DeleteFile) {
@@ -994,6 +998,10 @@ TEST_F(TestS3FS, DeleteDir) {
994998

995999
// URI
9961000
ASSERT_RAISES(Invalid, fs_->DeleteDir("s3:empty-bucket"));
1001+
1002+
// Extraneous slashes
1003+
ASSERT_RAISES(Invalid, fs_->DeleteDir("bucket//newdir"));
1004+
ASSERT_RAISES(Invalid, fs_->DeleteDir("bucket/newdir//newsub"));
9971005
}
9981006

9991007
TEST_F(TestS3FS, DeleteDirContents) {

0 commit comments

Comments
 (0)