diff --git a/etcdmain/etcd.go b/etcdmain/etcd.go index 73328a73d52..98f95e28bbb 100644 --- a/etcdmain/etcd.go +++ b/etcdmain/etcd.go @@ -127,7 +127,7 @@ func startEtcdOrProxyV2() { var stopped <-chan struct{} var errc <-chan error - + var lerrc <-chan error which := identifyDataDirOrDie(cfg.ec.GetLogger(), cfg.ec.Dir) if which != dirEmpty { if lg != nil { @@ -141,7 +141,7 @@ func startEtcdOrProxyV2() { } switch which { case dirMember: - stopped, errc, err = startEtcd(&cfg.ec) + stopped, errc, lerrc, err = startEtcd(&cfg.ec) case dirProxy: err = startProxy(cfg) default: @@ -157,7 +157,7 @@ func startEtcdOrProxyV2() { } else { shouldProxy := cfg.isProxy() if !shouldProxy { - stopped, errc, err = startEtcd(&cfg.ec) + stopped, errc, lerrc, err = startEtcd(&cfg.ec) if derr, ok := err.(*etcdserver.DiscoveryError); ok && derr.Err == v2discovery.ErrFullCluster { if cfg.shouldFallbackToProxy() { if lg != nil { @@ -284,7 +284,21 @@ func startEtcdOrProxyV2() { notifySystemd(lg) select { - case lerr := <-errc: + case err := <- errc: + if strings.Contains(err.Error(), etcdserver.ErrMemberRemoved.Error()) { + tombstoneFile := filepath.Join(cfg.ec.Dir, "tombstone") + if err := ioutil.WriteFile(tombstoneFile, []byte{}, 0600); err != nil { + if lg != nil { + lg.Fatal( + "failed to write tombstone file", + zap.String("tombstone-file", tombstoneFile), + ) + } else { + plog.Fatalf("failed to write tombstone file %s", tombstoneFile) + } + } + } + case lerr := <-lerrc: // fatal out on listener errors if lg != nil { lg.Fatal("listener failed", zap.Error(lerr)) @@ -298,17 +312,18 @@ func startEtcdOrProxyV2() { } // startEtcd runs StartEtcd in addition to hooks needed for standalone etcd. -func startEtcd(cfg *embed.Config) (<-chan struct{}, <-chan error, error) { +func startEtcd(cfg *embed.Config) (<-chan struct{}, <-chan error, <-chan error, error) { e, err := embed.StartEtcd(cfg) if err != nil { - return nil, nil, err + return nil, nil, nil, err } osutil.RegisterInterruptHandler(e.Close) select { case <-e.Server.ReadyNotify(): // wait for e.Server to join the cluster + case <-e.Server.ErrNotify(): // publish aborted errc channel case <-e.Server.StopNotify(): // publish aborted from 'ErrStopped' } - return e.Server.StopNotify(), e.Err(), nil + return e.Server.StopNotify(), e.Server.ErrNotify(), e.Err(), nil } // startProxy launches an HTTP proxy for client communication which proxies to other etcd nodes. diff --git a/etcdserver/errors.go b/etcdserver/errors.go index d0fe28970d1..73e89014d5b 100644 --- a/etcdserver/errors.go +++ b/etcdserver/errors.go @@ -39,6 +39,7 @@ var ( ErrKeyNotFound = errors.New("etcdserver: key not found") ErrCorrupt = errors.New("etcdserver: corrupt cluster") ErrBadLeaderTransferee = errors.New("etcdserver: bad leader transferee") + ErrMemberRemoved = errors.New("the member has been permanently removed from the cluster") ) type DiscoveryError struct { diff --git a/etcdserver/server.go b/etcdserver/server.go index a341625dccb..cd8d3a5bd29 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -24,6 +24,7 @@ import ( "net/http" "os" "path" + "path/filepath" "regexp" "sync" "sync/atomic" @@ -105,6 +106,7 @@ var ( plog = capnslog.NewPackageLogger("go.etcd.io/etcd", "etcdserver") storeMemberAttributeRegexp = regexp.MustCompile(path.Join(membership.StoreMembersPrefix, "[[:xdigit:]]{1,16}", "attributes")) + ) func init() { @@ -1388,7 +1390,7 @@ func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *apply) { } var shouldstop bool if ep.appliedt, ep.appliedi, shouldstop = s.apply(ents, &ep.confState); shouldstop { - go s.stopWithDelay(10*100*time.Millisecond, fmt.Errorf("the member has been permanently removed from the cluster")) + go s.stopWithDelay(10*100*time.Millisecond, ErrMemberRemoved) } } @@ -1551,6 +1553,8 @@ func (s *EtcdServer) stopWithDelay(d time.Duration, err error) { // when the server is stopped. func (s *EtcdServer) StopNotify() <-chan struct{} { return s.done } +func (s *EtcdServer) ErrNotify() <-chan error { return s.errorc } + func (s *EtcdServer) SelfStats() []byte { return s.stats.JSON() } func (s *EtcdServer) LeaderStats() []byte { @@ -2670,6 +2674,15 @@ func (s *EtcdServer) Logger() *zap.Logger { // IsLearner returns if the local member is raft learner func (s *EtcdServer) IsLearner() bool { + tombstoneFile := filepath.Join(s.Cfg.DataDir, "tombstone") + if _, err := os.Stat(tombstoneFile); err == nil { + if lg := s.getLogger(); lg != nil { + lg.Warn("this server has been removed from the cluster, to rejoin please restart the server") + } else { + plog.Warning("this server has been removed from the cluster, to rejoin please restart the server") + } + return false + } return s.cluster.IsLocalMemberLearner() }