Skip to content

Commit d52bd90

Browse files
authored
Merge pull request #19461 from fuweid/fix-downgrade-issue
deflake: TestDowngradeCancellationAfterDowngrading1InClusterOf3
2 parents ac7d3e9 + 2de17bd commit d52bd90

File tree

4 files changed

+49
-14
lines changed

4 files changed

+49
-14
lines changed

tests/e2e/cluster_downgrade_test.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
174174
t.Logf("Elect members for operations on members: %v", membersToChange)
175175

176176
t.Logf("Starting downgrade process to %q", lastVersionStr)
177-
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, currentVersion, lastClusterVersion)
177+
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, true, currentVersion, lastClusterVersion)
178178
require.NoError(t, err)
179179
if len(membersToChange) == len(epc.Procs) {
180180
e2e.AssertProcessLogs(t, epc.Procs[epc.WaitLeader(t)], "the cluster has been downgraded")
@@ -210,11 +210,12 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
210210
beforeMembers, beforeKV = getMembersAndKeys(t, cc)
211211

212212
t.Logf("Starting upgrade process to %q", currentVersionStr)
213-
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, lastClusterVersion, currentVersion)
213+
downgradeEnabled := triggerCancellation == noCancellation && numberOfMembersToDowngrade < clusterSize
214+
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, downgradeEnabled, lastClusterVersion, currentVersion)
214215
require.NoError(t, err)
215216
t.Log("Upgrade complete")
216217

217-
if triggerCancellation == noCancellation && numberOfMembersToDowngrade < clusterSize {
218+
if downgradeEnabled {
218219
t.Log("Downgrade should be still enabled")
219220
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
220221
} else {

tests/framework/e2e/cluster.go

+16
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"testing"
2929
"time"
3030

31+
"github.com/coreos/go-semver/semver"
3132
"go.uber.org/zap"
3233
"go.uber.org/zap/zaptest"
3334

@@ -711,6 +712,21 @@ func (cfg *EtcdProcessClusterConfig) binaryPath(i int) string {
711712
return execPath
712713
}
713714

715+
func (epc *EtcdProcessCluster) MinServerVersion() (*semver.Version, error) {
716+
var minVersion *semver.Version
717+
for _, member := range epc.Procs {
718+
ver, err := GetVersionFromBinary(member.Config().ExecPath)
719+
if err != nil {
720+
return nil, fmt.Errorf("failed to get version from member %s binary: %w", member.Config().Name, err)
721+
}
722+
723+
if minVersion == nil || ver.LessThan(*minVersion) {
724+
minVersion = ver
725+
}
726+
}
727+
return minVersion, nil
728+
}
729+
714730
func values(cfg embed.Config) map[string]string {
715731
fs := flag.NewFlagSet("etcd", flag.ContinueOnError)
716732
cfg.AddFlags(fs)

tests/framework/e2e/downgrade.go

+26-8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929

3030
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
3131
"go.etcd.io/etcd/api/v3/version"
32+
"go.etcd.io/etcd/server/v3/etcdserver"
3233
"go.etcd.io/etcd/tests/v3/framework/testutils"
3334
)
3435

@@ -127,14 +128,14 @@ func ValidateDowngradeInfo(t *testing.T, clus *EtcdProcessCluster, expected *pb.
127128
}
128129
}
129130

130-
func DowngradeUpgradeMembers(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, numberOfMembersToChange int, currentVersion, targetVersion *semver.Version) error {
131+
func DowngradeUpgradeMembers(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, numberOfMembersToChange int, downgradeEnabled bool, currentVersion, targetVersion *semver.Version) error {
131132
membersToChange := rand.Perm(len(clus.Procs))[:numberOfMembersToChange]
132133
t.Logf("Elect members for operations on members: %v", membersToChange)
133134

134-
return DowngradeUpgradeMembersByID(t, lg, clus, membersToChange, currentVersion, targetVersion)
135+
return DowngradeUpgradeMembersByID(t, lg, clus, membersToChange, downgradeEnabled, currentVersion, targetVersion)
135136
}
136137

137-
func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, membersToChange []int, currentVersion, targetVersion *semver.Version) error {
138+
func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, membersToChange []int, downgradeEnabled bool, currentVersion, targetVersion *semver.Version) error {
138139
if lg == nil {
139140
lg = clus.lg
140141
}
@@ -162,15 +163,32 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
162163
}
163164
}
164165

165-
clusterVersion := targetVersion.String()
166-
if !isDowngrade && len(membersToChange) != len(clus.Procs) {
167-
clusterVersion = currentVersion.String()
168-
}
166+
t.Log("Waiting health interval to make sure the leader propagates version to new processes")
167+
time.Sleep(etcdserver.HealthInterval)
168+
169169
lg.Info("Validating versions")
170+
clusterVersion := targetVersion
171+
if !isDowngrade {
172+
if downgradeEnabled {
173+
// If the downgrade isn't cancelled yet, then the cluster
174+
// version will always stay at the lower version, no matter
175+
// what's the binary version of each member.
176+
clusterVersion = currentVersion
177+
} else {
178+
// If the downgrade has already been cancelled, then the
179+
// cluster version is the minimal server version.
180+
minVer, err := clus.MinServerVersion()
181+
if err != nil {
182+
return fmt.Errorf("failed to get min server version: %w", err)
183+
}
184+
clusterVersion = minVer
185+
}
186+
}
187+
170188
for _, memberID := range membersToChange {
171189
member := clus.Procs[memberID]
172190
ValidateVersion(t, clus.Cfg, member, version.Versions{
173-
Cluster: clusterVersion,
191+
Cluster: clusterVersion.String(),
174192
Server: targetVersion.String(),
175193
})
176194
}

tests/robustness/failpoint/cluster.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ func (f memberDowngrade) Inject(ctx context.Context, t *testing.T, lg *zap.Logge
174174
time.Sleep(etcdserver.HealthInterval)
175175
e2e.DowngradeEnable(t, clus, lastVersion)
176176

177-
err = e2e.DowngradeUpgradeMembers(t, lg, clus, numberOfMembersToDowngrade, currentVersion, lastVersion)
177+
err = e2e.DowngradeUpgradeMembers(t, lg, clus, numberOfMembersToDowngrade, true, currentVersion, lastVersion)
178178
time.Sleep(etcdserver.HealthInterval)
179179
return nil, err
180180
}
@@ -228,7 +228,7 @@ func (f memberDowngradeUpgrade) Inject(ctx context.Context, t *testing.T, lg *za
228228

229229
e2e.DowngradeEnable(t, clus, lastVersion)
230230
// downgrade all members first
231-
err = e2e.DowngradeUpgradeMembers(t, lg, clus, len(clus.Procs), currentVersion, lastVersion)
231+
err = e2e.DowngradeUpgradeMembers(t, lg, clus, len(clus.Procs), true, currentVersion, lastVersion)
232232
if err != nil {
233233
return nil, err
234234
}
@@ -241,7 +241,7 @@ func (f memberDowngradeUpgrade) Inject(ctx context.Context, t *testing.T, lg *za
241241

242242
// partial upgrade the cluster
243243
numberOfMembersToUpgrade := rand.Int()%len(clus.Procs) + 1
244-
err = e2e.DowngradeUpgradeMembers(t, lg, clus, numberOfMembersToUpgrade, lastVersion, currentVersion)
244+
err = e2e.DowngradeUpgradeMembers(t, lg, clus, numberOfMembersToUpgrade, false, lastVersion, currentVersion)
245245
time.Sleep(etcdserver.HealthInterval)
246246
return nil, err
247247
}

0 commit comments

Comments
 (0)