Skip to content

Commit 0dcdcff

Browse files
Add MemberDowngrade failpoint
Signed-off-by: Siyuan Zhang <[email protected]>
1 parent e0bbea9 commit 0dcdcff

File tree

4 files changed

+126
-0
lines changed

4 files changed

+126
-0
lines changed

tests/framework/e2e/curl.go

+7
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,10 @@ func CURLGet(clus *EtcdProcessCluster, req CURLReq) error {
128128

129129
return SpawnWithExpectsContext(ctx, CURLPrefixArgsCluster(clus.Cfg, clus.Procs[rand.Intn(clus.Cfg.ClusterSize)], "GET", req), nil, req.Expected)
130130
}
131+
132+
func CURLGetFromMember(clus *EtcdProcessCluster, member EtcdProcess, req CURLReq) error {
133+
ctx, cancel := context.WithTimeout(context.Background(), req.timeoutDuration())
134+
defer cancel()
135+
136+
return SpawnWithExpectsContext(ctx, CURLPrefixArgsCluster(clus.Cfg, member, "GET", req), nil, req.Expected)
137+
}

tests/robustness/failpoint/cluster.go

+116
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ import (
2323
"testing"
2424
"time"
2525

26+
"github.com/coreos/go-semver/semver"
2627
"github.com/stretchr/testify/require"
2728
"go.uber.org/zap"
2829

30+
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
2931
clientv3 "go.etcd.io/etcd/client/v3"
32+
"go.etcd.io/etcd/pkg/v3/expect"
3033
"go.etcd.io/etcd/server/v3/etcdserver"
3134
"go.etcd.io/etcd/tests/v3/framework/e2e"
3235
"go.etcd.io/etcd/tests/v3/robustness/identity"
@@ -35,6 +38,7 @@ import (
3538
)
3639

3740
var MemberReplace Failpoint = memberReplace{}
41+
var MemberDowngrade Failpoint = memberDowngrade{}
3842

3943
type memberReplace struct{}
4044

@@ -138,6 +142,92 @@ func (f memberReplace) Available(config e2e.EtcdProcessClusterConfig, member e2e
138142
return config.ClusterSize > 1 && (config.Version == e2e.QuorumLastVersion || member.Config().ExecPath == e2e.BinPath.Etcd)
139143
}
140144

145+
type memberDowngrade struct{}
146+
147+
func (f memberDowngrade) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster, baseTime time.Time, ids identity.Provider) ([]report.ClientReport, error) {
148+
v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd)
149+
if err != nil {
150+
return nil, err
151+
}
152+
targetVersion := semver.Version{Major: v.Major, Minor: v.Minor - 1}
153+
numberOfMembersToDowngrade := rand.Int()%len(clus.Procs) + 1
154+
membersToDowngrade := rand.Perm(len(clus.Procs))[:numberOfMembersToDowngrade]
155+
lg.Info("Test downgrading members", zap.Any("members", membersToDowngrade))
156+
157+
member := clus.Procs[0]
158+
endpoints := []string{member.EndpointsGRPC()[0]}
159+
cc, err := clientv3.New(clientv3.Config{
160+
Endpoints: endpoints,
161+
Logger: zap.NewNop(),
162+
DialKeepAliveTime: 10 * time.Second,
163+
DialKeepAliveTimeout: 100 * time.Millisecond,
164+
})
165+
if err != nil {
166+
return nil, err
167+
}
168+
defer cc.Close()
169+
170+
// Need to wait health interval for cluster to accept changes
171+
time.Sleep(etcdserver.HealthInterval)
172+
lg.Info("Enable downgrade")
173+
err = enableDowngrade(ctx, cc, &targetVersion)
174+
if err != nil {
175+
return nil, err
176+
}
177+
// Need to wait health interval for cluster to prepare for downgrade
178+
time.Sleep(etcdserver.HealthInterval)
179+
180+
for _, memberID := range membersToDowngrade {
181+
member = clus.Procs[memberID]
182+
lg.Info("Downgrading member", zap.String("member", member.Config().Name))
183+
for member.IsRunning() {
184+
err = member.Kill()
185+
if err != nil {
186+
lg.Info("Sending kill signal failed", zap.Error(err))
187+
}
188+
err = member.Wait(ctx)
189+
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") {
190+
lg.Info("Failed to kill the process", zap.Error(err))
191+
return nil, fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err)
192+
}
193+
}
194+
if lazyfs := member.LazyFS(); lazyfs != nil {
195+
lg.Info("Removing data that was not fsynced")
196+
err := lazyfs.ClearCache(ctx)
197+
if err != nil {
198+
return nil, err
199+
}
200+
}
201+
member.Config().ExecPath = e2e.BinPath.EtcdLastRelease
202+
err = patchArgs(member.Config().Args, "initial-cluster-state", "existing")
203+
if err != nil {
204+
return nil, err
205+
}
206+
lg.Info("Restarting member", zap.String("member", member.Config().Name))
207+
err = member.Start(ctx)
208+
if err != nil {
209+
return nil, err
210+
}
211+
err = verifyVersion(t, clus, member, targetVersion)
212+
}
213+
time.Sleep(etcdserver.HealthInterval)
214+
return nil, err
215+
}
216+
217+
func (f memberDowngrade) Name() string {
218+
return "MemberDowngrade"
219+
}
220+
221+
func (f memberDowngrade) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess, profile traffic.Profile) bool {
222+
v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd)
223+
if err != nil {
224+
panic("Failed checking etcd version binary")
225+
}
226+
v3_6 := semver.Version{Major: 3, Minor: 6}
227+
// only current version cluster can be downgraded.
228+
return config.ClusterSize > 1 && v.Compare(v3_6) >= 0 && (config.Version == e2e.CurrentVersion && member.Config().ExecPath == e2e.BinPath.Etcd)
229+
}
230+
141231
func getID(ctx context.Context, cc *clientv3.Client, name string) (id uint64, found bool, err error) {
142232
// Ensure linearized MemberList by first making a linearized Get request from the same member.
143233
// This is required for v3.4 support as it doesn't support linearized MemberList https://github.com/etcd-io/etcd/issues/18929
@@ -170,3 +260,29 @@ func patchArgs(args []string, flag, newValue string) error {
170260
}
171261
return fmt.Errorf("--%s flag not found", flag)
172262
}
263+
264+
func enableDowngrade(ctx context.Context, cc *clientv3.Client, targetVersion *semver.Version) error {
265+
_, err := cc.Maintenance.Downgrade(ctx, clientv3.DowngradeAction(pb.DowngradeRequest_VALIDATE), targetVersion.String())
266+
if err != nil {
267+
return err
268+
}
269+
_, err = cc.Maintenance.Downgrade(ctx, clientv3.DowngradeAction(pb.DowngradeRequest_ENABLE), targetVersion.String())
270+
return err
271+
}
272+
273+
func verifyVersion(t *testing.T, clus *e2e.EtcdProcessCluster, member e2e.EtcdProcess, expectedVersion semver.Version) error {
274+
var err error
275+
expected := fmt.Sprintf(`"etcdserver":"%d.%d\..*"etcdcluster":"%d\.%d\.`, expectedVersion.Major, expectedVersion.Minor, expectedVersion.Major, expectedVersion.Minor)
276+
for i := 0; i < 35; i++ {
277+
if err = e2e.CURLGetFromMember(clus, member, e2e.CURLReq{Endpoint: "/version", Expected: expect.ExpectedResponse{Value: expected, IsRegularExpr: true}}); err != nil {
278+
t.Logf("#%d: v3 is not ready yet (%v)", i, err)
279+
time.Sleep(200 * time.Millisecond)
280+
continue
281+
}
282+
break
283+
}
284+
if err != nil {
285+
return fmt.Errorf("failed to verify version, expected %v got (%v)", expected, err)
286+
}
287+
return nil
288+
}

tests/robustness/failpoint/failpoint.go

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ var allFailpoints = []Failpoint{
4646
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
4747
BeforeApplyOneConfChangeSleep,
4848
MemberReplace,
49+
MemberDowngrade,
4950
DropPeerNetwork,
5051
RaftBeforeSaveSleep,
5152
RaftAfterSaveSleep,

tests/robustness/report/wal.go

+2
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ func parseEntryNormal(ent raftpb.Entry) (*model.EtcdRequest, error) {
183183
return nil, nil
184184
case raftReq.ClusterVersionSet != nil:
185185
return nil, nil
186+
case raftReq.DowngradeInfoSet != nil:
187+
return nil, nil
186188
case raftReq.Compaction != nil:
187189
request := model.EtcdRequest{
188190
Type: model.Compact,

0 commit comments

Comments
 (0)