Skip to content

Commit

Permalink
Fallback to LMT on missing source hash (#2866)
Browse files Browse the repository at this point in the history
  • Loading branch information
gapra-msft authored Jan 16, 2025
1 parent 903f41c commit 74ff288
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 12 deletions.
37 changes: 25 additions & 12 deletions cmd/syncComparator.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ import (
)

const (
syncSkipReasonTime = "the source has an older LMT than the destination"
syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5"
syncSkipReasonSameHash = "the source has the same hash"
syncOverwriteReasonNewerHash = "the source has a differing hash"
syncOverwriteReasonNewerLMT = "the source is more recent than the destination"
syncStatusSkipped = "skipped"
syncStatusOverwritten = "overwritten"
syncOverwriteReasonDeleteDestinationFile = "the flag delete-destination-file is set to true"
syncSkipReasonTime = "the source has an older LMT than the destination"
syncSkipReasonTimeAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and has an older LMT than the destination"
syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5"
syncSkipReasonSameHash = "the source has the same hash"
syncOverwriteReasonNewerHash = "the source has a differing hash"
syncOverwriteReasonNewerLMT = "the source is more recent than the destination"
syncOverwriteReasonNewerLMTAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and is more recent than the destination"
syncStatusSkipped = "skipped"
syncStatusOverwritten = "overwritten"
)

func syncComparatorLog(fileName, status, skipReason string, stdout bool) {
Expand Down Expand Up @@ -98,8 +99,14 @@ func (f *syncDestinationComparator) processIfNecessary(destinationObject StoredO
switch f.comparisonHashType {
case common.ESyncHashType.MD5():
if sourceObjectInMap.md5 == nil {
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true)
return nil
if sourceObjectInMap.isMoreRecentThan(destinationObject, f.preferSMBTime) {
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false)
return f.copyTransferScheduler(sourceObjectInMap)
} else {
// skip if dest is more recent
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false)
return nil
}
}

if !reflect.DeepEqual(sourceObjectInMap.md5, destinationObject.md5) {
Expand Down Expand Up @@ -177,8 +184,14 @@ func (f *syncSourceComparator) processIfNecessary(sourceObject StoredObject) err
switch f.comparisonHashType {
case common.ESyncHashType.MD5():
if sourceObject.md5 == nil {
syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true)
return nil
if sourceObject.isMoreRecentThan(destinationObjectInMap, f.preferSMBTime) {
syncComparatorLog(sourceObject.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false)
return f.copyTransferScheduler(sourceObject)
} else {
// skip if dest is more recent
syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false)
return nil
}
}

if !reflect.DeepEqual(sourceObject.md5, destinationObjectInMap.md5) {
Expand Down
176 changes: 176 additions & 0 deletions e2etest/zt_newe2e_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,179 @@ func (s *SyncTestSuite) Scenario_TestSyncDeleteDestinationIfNecessary(svm *Scena
},
}, true)
}

// Note : For local sources, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way
// for local sources to have no source hash. As such these tests only cover remote sources.
func (s *SyncTestSuite) Scenario_TestSyncHashTypeSourceHash(svm *ScenarioVariationManager) {

// There are 4 cases to consider, this test will cover all of them
// 1. Has hash and is equal -> skip
// 2. Has hash and is not equal -> overwrite
// 3. Has no hash and src LMT after dest LMT -> overwrite
// 4. Has no hash and src LMT before dest LMT -> skip

// Create dest
hashEqualBody := NewRandomObjectContentContainer(512)
hashNotEqualBody := NewRandomObjectContentContainer(512)
noHashDestSrc := NewRandomObjectContentContainer(512)
noHashSrcDest := NewRandomObjectContentContainer(512)

zeroBody := NewZeroObjectContentContainer(512)

dest := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})),
ResourceDefinitionContainer{
Objects: ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: zeroBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: zeroBody},
},
},
)

time.Sleep(time.Second * 10) // Make sure source is newer

srcObjs := ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest},
}

src := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, common.ELocation.Blob()),
ResourceDefinitionContainer{
Objects: srcObjs,
},
)

// Need to manually unset the md5
src.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})
src.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})

time.Sleep(time.Second * 10) // Make sure destination is newer

// Re-create nohashsrcdest so the src LMT is before dest LMT
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, noHashSrcDest, ObjectProperties{})

stdOut, _ := RunAzCopy(
svm,
AzCopyCommand{
Verb: AzCopyVerbSync,
Targets: []ResourceManager{src, dest},
Flags: SyncFlags{
CopySyncCommonFlags: CopySyncCommonFlags{
Recursive: pointerTo(true),
},
CompareHash: pointerTo(common.ESyncHashType.MD5()),
LocalHashStorageMode: pointerTo(common.EHashStorageMode.HiddenFiles()), // This is OS agnostic (ADO does not support xattr so Linux test fails without this).
},
})

// All source, dest should match
ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{
Objects: srcObjs,
}, true)

// Only non skipped paths should be in plan file
ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{
Objects: map[PlanFilePath]PlanFileObject{
PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: {
Properties: ObjectProperties{},
},
},
})
}

// Note : For local destinations, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way
// for local destinations to have no source hash. As such these tests only cover remote destinations.
func (s *SyncTestSuite) Scenario_TestSyncHashTypeDestinationHash(svm *ScenarioVariationManager) {

// There are 4 cases to consider, this test will cover all of them
// 1. Has hash and is equal -> skip
// 2. Has hash and is not equal -> overwrite
// 3. Has no hash and src LMT after dest LMT -> overwrite
// 4. Has no hash and src LMT before dest LMT -> overwrite

// Create dest
hashEqualBody := NewRandomObjectContentContainer(512)
hashNotEqualBody := NewRandomObjectContentContainer(512)
noHashDestSrc := NewRandomObjectContentContainer(512)
noHashSrcDest := NewRandomObjectContentContainer(512)

zeroBody := NewZeroObjectContentContainer(512)

dest := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, common.ELocation.Blob()),
ResourceDefinitionContainer{
Objects: ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: zeroBody},
"nohashdestsrc": ResourceDefinitionObject{Body: zeroBody},
"nohashsrcdest": ResourceDefinitionObject{Body: zeroBody},
},
},
)

time.Sleep(time.Second * 10) // Make sure source is newer

srcObjs := ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest},
}

src := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})),
ResourceDefinitionContainer{
Objects: srcObjs,
},
)

// Need to manually unset the md5
dest.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})

time.Sleep(time.Second * 10) // Make sure destination is newer

// Re-create nohashsrcdest so the src LMT is before dest LMT
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, zeroBody, ObjectProperties{})

stdOut, _ := RunAzCopy(
svm,
AzCopyCommand{
Verb: AzCopyVerbSync,
Targets: []ResourceManager{src, dest},
Flags: SyncFlags{
CopySyncCommonFlags: CopySyncCommonFlags{
Recursive: pointerTo(true),
},
CompareHash: pointerTo(common.ESyncHashType.MD5()),
},
})

// All source, dest should match
ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{
Objects: srcObjs,
}, true)

// Only non skipped paths should be in plan file
ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{
Objects: map[PlanFilePath]PlanFileObject{
PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashsrcdest", DstPath: "/nohashsrcdest"}: {
Properties: ObjectProperties{},
},
},
})
}

0 comments on commit 74ff288

Please sign in to comment.