Skip to content

Commit cee5fbd

Browse files
Copilototendolkar
andcommitted
Optimize blobFS folder enumeration to skip GetProperties calls
When isDFS is true (HNS-enabled accounts), optimize blob traverser to: 1. Skip GetProperties calls for virtual directories - they're always real folders in HNS accounts 2. Skip checking blob metadata for hdi_isfolder - folders are represented as virtual directories, not as blobs This significantly improves enumeration performance for blobFS to blobFS transfers with large folder structures by eliminating many additional API calls. Co-authored-by: otendolkar <[email protected]>
1 parent f15269e commit cee5fbd

File tree

1 file changed

+71
-43
lines changed

1 file changed

+71
-43
lines changed

cmd/zc_traverser_blob.go

Lines changed: 71 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -442,59 +442,83 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe
442442
enqueuedDirAsOutput := false // Reset the flag for each directory processed
443443

444444
if t.include.DirStubs() || t.includeDirectoryOrPrefix {
445-
// try to get properties on the directory itself, since it's not listed in BlobItems
446445
dName := strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)
447-
blobClient := containerClient.NewBlobClient(dName)
448-
altNameCheck:
449-
pResp, err := blobClient.GetProperties(t.ctx, nil)
450-
if err == nil {
451-
if !t.doesBlobRepresentAFolder(pResp.Metadata) { // We've picked up on a file *named* the folder, not the folder itself. Does folder/ exist?
452-
if !strings.HasSuffix(dName, "/") {
453-
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
454-
dName += common.AZCOPY_PATH_SEPARATOR_STRING
455-
goto altNameCheck // "foo" is a file, what about "foo/"?
456-
}
457-
458-
goto skipDirAdd // We shouldn't add a blob that isn't a folder as a folder. You either have the folder metadata, or you don't.
459-
}
460-
461-
pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
446+
447+
// Optimization for HNS-enabled (BlobFS) accounts:
448+
// In HNS accounts, all virtual directories ARE actual directories, so we can skip
449+
// the expensive GetProperties calls to check for hdi_isfolder metadata.
450+
// This significantly improves enumeration performance for large folder structures.
451+
if t.isDFS {
452+
// Directly create the folder object without checking properties
462453
folderRelativePath := strings.TrimPrefix(dName, searchPrefix)
463-
464454
storedObject := newStoredObject(
465455
preprocessor,
466456
getObjectNameOnly(dName),
467457
folderRelativePath,
468458
common.EEntityType.Folder(),
469-
pbPropAdapter.LastModified(),
470-
pbPropAdapter.ContentLength(),
471-
pbPropAdapter,
472-
pbPropAdapter,
473-
pbPropAdapter.Metadata,
459+
time.Time{}, // No timestamp available without GetProperties
460+
0, // Folders have zero size
461+
noContentProps,
462+
noBlobProps,
463+
common.Metadata{},
474464
containerName,
475465
)
476-
storedObject.tryUpdateTimestampsFromMetadata(pbPropAdapter.Metadata)
466+
enqueueOutput(storedObject, nil)
467+
enqueuedDirAsOutput = true
468+
} else {
469+
// For non-HNS accounts, we need to check if it's a folder stub
470+
blobClient := containerClient.NewBlobClient(dName)
471+
altNameCheck:
472+
pResp, err := blobClient.GetProperties(t.ctx, nil)
473+
if err == nil {
474+
if !t.doesBlobRepresentAFolder(pResp.Metadata) { // We've picked up on a file *named* the folder, not the folder itself. Does folder/ exist?
475+
if !strings.HasSuffix(dName, "/") {
476+
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
477+
dName += common.AZCOPY_PATH_SEPARATOR_STRING
478+
goto altNameCheck // "foo" is a file, what about "foo/"?
479+
}
477480

478-
if t.s2sPreserveSourceTags {
479-
tResp, err := blobClient.GetTags(t.ctx, nil)
481+
goto skipDirAdd // We shouldn't add a blob that isn't a folder as a folder. You either have the folder metadata, or you don't.
482+
}
480483

481-
if err == nil {
482-
blobTagsMap := common.BlobTags{}
483-
for _, blobTag := range tResp.BlobTagSet {
484-
blobTagsMap[url.QueryEscape(*blobTag.Key)] = url.QueryEscape(*blobTag.Value)
484+
pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
485+
folderRelativePath := strings.TrimPrefix(dName, searchPrefix)
486+
487+
storedObject := newStoredObject(
488+
preprocessor,
489+
getObjectNameOnly(dName),
490+
folderRelativePath,
491+
common.EEntityType.Folder(),
492+
pbPropAdapter.LastModified(),
493+
pbPropAdapter.ContentLength(),
494+
pbPropAdapter,
495+
pbPropAdapter,
496+
pbPropAdapter.Metadata,
497+
containerName,
498+
)
499+
storedObject.tryUpdateTimestampsFromMetadata(pbPropAdapter.Metadata)
500+
501+
if t.s2sPreserveSourceTags {
502+
tResp, err := blobClient.GetTags(t.ctx, nil)
503+
504+
if err == nil {
505+
blobTagsMap := common.BlobTags{}
506+
for _, blobTag := range tResp.BlobTagSet {
507+
blobTagsMap[url.QueryEscape(*blobTag.Key)] = url.QueryEscape(*blobTag.Value)
508+
}
509+
storedObject.blobTags = blobTagsMap
485510
}
486-
storedObject.blobTags = blobTagsMap
487511
}
488-
}
489512

490-
enqueueOutput(storedObject, err)
491-
enqueuedDirAsOutput = true
492-
} else {
493-
// There was nothing there, but is there folder/?
494-
if !strings.HasSuffix(dName, "/") {
495-
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
496-
dName += common.AZCOPY_PATH_SEPARATOR_STRING
497-
goto altNameCheck // "foo" is a file, what about "foo/"?
513+
enqueueOutput(storedObject, err)
514+
enqueuedDirAsOutput = true
515+
} else {
516+
// There was nothing there, but is there folder/?
517+
if !strings.HasSuffix(dName, "/") {
518+
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
519+
dName += common.AZCOPY_PATH_SEPARATOR_STRING
520+
goto altNameCheck // "foo" is a file, what about "foo/"?
521+
}
498522
}
499523
}
500524
skipDirAdd:
@@ -530,8 +554,10 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe
530554

531555
// process the blobs returned in this result segment
532556
for _, blobInfo := range lResp.Segment.BlobItems {
533-
// if the blob represents a hdi folder, then skip it
534-
if t.doesBlobRepresentAFolder(blobInfo.Metadata) {
557+
// For HNS-enabled (BlobFS) accounts, folders are represented as virtual directories
558+
// in the BlobPrefixes, not as blobs with hdi_isfolder metadata.
559+
// So we only need to check for folder blobs in non-HNS accounts.
560+
if !t.isDFS && t.doesBlobRepresentAFolder(blobInfo.Metadata) {
535561
continue
536562
}
537563

@@ -684,8 +710,10 @@ func (t *blobTraverser) serialList(containerClient *container.Client, containerN
684710
}
685711
// process the blobs returned in this result segment
686712
for _, blobInfo := range resp.Segment.BlobItems {
687-
// if the blob represents a hdi folder, then skip it
688-
if t.doesBlobRepresentAFolder(blobInfo.Metadata) {
713+
// For HNS-enabled (BlobFS) accounts, folders are represented as virtual directories
714+
// in the BlobPrefixes, not as blobs with hdi_isfolder metadata.
715+
// So we only need to check for folder blobs in non-HNS accounts.
716+
if !t.isDFS && t.doesBlobRepresentAFolder(blobInfo.Metadata) {
689717
continue
690718
}
691719

0 commit comments

Comments
 (0)