Skip to content

Commit 65159a2

Browse files
committed
*: Update cases related to Downgrade
1. Update DowngradeUpgradeMembersByID If it's downgrading process, the desire version of cluster should be target one. If it's upgrading process, the desire version of cluster should be determined by mininum binary version of members. 2. Remove AssertProcessLogs from DowngradeEnable The log message "The server is ready to downgrade" appears only when the storage version monitor detects a mismatch between the cluster and storage versions. If traffic is insufficient to trigger a commit or if an auto-commit occurs right after reading the storage version, the monitor may fail to update it, leading to errors like: ```bash "msg":"failed to update storage version","cluster-version":"3.6.0", "error":"cannot detect storage schema version: missing confstate information" ``` Given this, we should remove the AssertProcessLogs statement. Similar to #19313 Signed-off-by: Wei Fu <[email protected]>
1 parent 091b6ed commit 65159a2

File tree

3 files changed

+80
-25
lines changed

3 files changed

+80
-25
lines changed

tests/e2e/cluster_downgrade_test.go

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/stretchr/testify/assert"
2626
"github.com/stretchr/testify/require"
2727

28+
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
2829
"go.etcd.io/etcd/api/v3/version"
2930
"go.etcd.io/etcd/client/pkg/v3/fileutil"
3031
"go.etcd.io/etcd/client/pkg/v3/types"
@@ -51,6 +52,10 @@ func TestDowngradeUpgradeClusterOf1(t *testing.T) {
5152
testDowngradeUpgrade(t, 1, 1, false, noCancellation)
5253
}
5354

55+
func TestDowngradeUpgrade2InClusterOf3(t *testing.T) {
56+
testDowngradeUpgrade(t, 2, 3, false, noCancellation)
57+
}
58+
5459
func TestDowngradeUpgradeClusterOf3(t *testing.T) {
5560
testDowngradeUpgrade(t, 3, 3, false, noCancellation)
5661
}
@@ -128,6 +133,9 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
128133
time.Sleep(etcdserver.HealthInterval)
129134
}
130135

136+
t.Log("Downgrade should be disabled")
137+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
138+
131139
t.Log("Adding member to test membership, but a learner avoid breaking quorum")
132140
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
133141
require.NoError(t, err)
@@ -150,6 +158,10 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
150158
return // No need to perform downgrading, end the test here
151159
}
152160
e2e.DowngradeEnable(t, epc, lastVersion)
161+
162+
t.Log("Downgrade should be enabled")
163+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
164+
153165
if triggerCancellation == cancelRightAfterEnable {
154166
t.Logf("Cancelling downgrade right after enabling (no node is downgraded yet)")
155167
e2e.DowngradeCancel(t, epc)
@@ -165,7 +177,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
165177
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, currentVersion, lastClusterVersion)
166178
require.NoError(t, err)
167179
if len(membersToChange) == len(epc.Procs) {
168-
e2e.AssertProcessLogs(t, leader(t, epc), "the cluster has been downgraded")
180+
e2e.AssertProcessLogs(t, epc.Procs[epc.WaitLeader(t)], "the cluster has been downgraded")
169181
}
170182

171183
t.Log("Downgrade complete")
@@ -202,6 +214,14 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
202214
require.NoError(t, err)
203215
t.Log("Upgrade complete")
204216

217+
if triggerCancellation == noCancellation && numberOfMembersToDowngrade < clusterSize {
218+
t.Log("Downgrade should be still enabled")
219+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
220+
} else {
221+
t.Log("Downgrade should be disabled")
222+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
223+
}
224+
205225
afterMembers, afterKV = getMembersAndKeys(t, cc)
206226
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
207227
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
@@ -224,27 +244,6 @@ func newCluster(t *testing.T, clusterSize int, snapshotCount uint64) *e2e.EtcdPr
224244
return epc
225245
}
226246

227-
func leader(t *testing.T, epc *e2e.EtcdProcessCluster) e2e.EtcdProcess {
228-
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
229-
defer cancel()
230-
for i := 0; i < len(epc.Procs); i++ {
231-
endpoints := epc.Procs[i].EndpointsGRPC()
232-
cli, err := clientv3.New(clientv3.Config{
233-
Endpoints: endpoints,
234-
DialTimeout: 3 * time.Second,
235-
})
236-
require.NoError(t, err)
237-
defer cli.Close()
238-
resp, err := cli.Status(ctx, endpoints[0])
239-
require.NoError(t, err)
240-
if resp.Header.GetMemberId() == resp.Leader {
241-
return epc.Procs[i]
242-
}
243-
}
244-
t.Fatal("Leader not found")
245-
return nil
246-
}
247-
248247
func generateSnapshot(t *testing.T, snapshotCount uint64, cc *e2e.EtcdctlV3) {
249248
ctx, cancel := context.WithCancel(context.Background())
250249
defer cancel()

tests/framework/e2e/downgrade.go

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/stretchr/testify/require"
2828
"go.uber.org/zap"
2929

30+
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
3031
"go.etcd.io/etcd/api/v3/version"
3132
"go.etcd.io/etcd/tests/v3/framework/testutils"
3233
)
@@ -46,7 +47,6 @@ func DowngradeEnable(t *testing.T, epc *EtcdProcessCluster, ver *semver.Version)
4647
Server: OffsetMinor(ver, 1).String(),
4748
Storage: ver.String(),
4849
})
49-
AssertProcessLogs(t, epc.Procs[i], "The server is ready to downgrade")
5050
}
5151

5252
t.Log("Cluster is ready for downgrade")
@@ -82,6 +82,51 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
8282
t.Log("Cluster downgrade cancellation is completed")
8383
}
8484

85+
func ValidateDowngradeInfo(t *testing.T, clus *EtcdProcessCluster, expected *pb.DowngradeInfo) {
86+
cfg := clus.Cfg
87+
88+
for i := 0; i < len(clus.Procs); i++ {
89+
member := clus.Procs[i]
90+
mc := member.Etcdctl()
91+
mName := member.Config().Name
92+
93+
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
94+
for {
95+
statuses, err := mc.Status(context.Background())
96+
if err != nil {
97+
cfg.Logger.Warn("failed to get member status and retrying",
98+
zap.Error(err),
99+
zap.String("member", mName))
100+
101+
time.Sleep(time.Second)
102+
continue
103+
}
104+
105+
require.Lenf(t, statuses, 1, "member %s", mName)
106+
got := (*pb.StatusResponse)(statuses[0]).GetDowngradeInfo()
107+
108+
if got.GetEnabled() == expected.GetEnabled() && got.GetTargetVersion() == expected.GetTargetVersion() {
109+
cfg.Logger.Info("DowngradeInfo match", zap.String("member", mName))
110+
break
111+
}
112+
113+
cfg.Logger.Warn("DowngradeInfo didn't match retrying",
114+
zap.String("member", mName),
115+
zap.Dict("expected",
116+
zap.Bool("Enabled", expected.GetEnabled()),
117+
zap.String("TargetVersion", expected.GetTargetVersion()),
118+
),
119+
zap.Dict("got",
120+
zap.Bool("Enabled", got.GetEnabled()),
121+
zap.String("TargetVersion", got.GetTargetVersion()),
122+
),
123+
)
124+
time.Sleep(time.Second)
125+
}
126+
})
127+
}
128+
}
129+
85130
func DowngradeUpgradeMembers(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, numberOfMembersToChange int, currentVersion, targetVersion *semver.Version) error {
86131
membersToChange := rand.Perm(len(clus.Procs))[:numberOfMembersToChange]
87132
t.Logf("Elect members for operations on members: %v", membersToChange)
@@ -100,7 +145,6 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
100145
opString = "downgrading"
101146
newExecPath = BinPath.EtcdLastRelease
102147
}
103-
104148
for _, memberID := range membersToChange {
105149
member := clus.Procs[memberID]
106150
if member.Config().ExecPath == newExecPath {
@@ -117,11 +161,16 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
117161
return err
118162
}
119163
}
164+
165+
clusterVersion := targetVersion.String()
166+
if !isDowngrade && len(membersToChange) != len(clus.Procs) {
167+
clusterVersion = currentVersion.String()
168+
}
120169
lg.Info("Validating versions")
121170
for _, memberID := range membersToChange {
122171
member := clus.Procs[memberID]
123172
ValidateVersion(t, clus.Cfg, member, version.Versions{
124-
Cluster: targetVersion.String(),
173+
Cluster: clusterVersion,
125174
Server: targetVersion.String(),
126175
})
127176
}

tests/robustness/failpoint/cluster.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,13 @@ func (f memberDowngradeUpgrade) Inject(ctx context.Context, t *testing.T, lg *za
232232
if err != nil {
233233
return nil, err
234234
}
235+
236+
// NOTE: By default, the leader can cancel the downgrade once all members
237+
// have reached the target version. However, determining the final stable
238+
// cluster version after an upgrade can be challenging. To ensure stability,
239+
// we should wait for leader to cancel downgrade process.
240+
e2e.AssertProcessLogs(t, clus.Procs[clus.WaitLeader(t)], "the cluster has been downgraded")
241+
235242
// partial upgrade the cluster
236243
numberOfMembersToUpgrade := rand.Int()%len(clus.Procs) + 1
237244
err = e2e.DowngradeUpgradeMembers(t, lg, clus, numberOfMembersToUpgrade, lastVersion, currentVersion)

0 commit comments

Comments
 (0)