Add configurable cleanup of dead servers when a new server joins
This commit is contained in:
parent
2dcfd9115d
commit
2c9001a389
|
@ -274,6 +274,10 @@ type Config struct {
|
|||
// This period is meant to be long enough for a leader election to take
|
||||
// place, and a small jitter is applied to avoid a thundering herd.
|
||||
RPCHoldTimeout time.Duration
|
||||
|
||||
// AutopilotServerCleanup controls whether to remove dead servers when a new
|
||||
// server is added to the Raft peers
|
||||
AutopilotServerCleanup bool
|
||||
}
|
||||
|
||||
// CheckVersion is used to check if the ProtocolVersion is valid
|
||||
|
@ -346,6 +350,8 @@ func DefaultConfig() *Config {
|
|||
RPCHoldTimeout: 7 * time.Second,
|
||||
|
||||
TLSMinVersion: "tls10",
|
||||
|
||||
AutopilotServerCleanup: true,
|
||||
}
|
||||
|
||||
// Increase our reap interval to 3 days instead of 24h.
|
||||
|
|
|
@ -567,6 +567,20 @@ func (s *Server) joinConsulServer(m serf.Member, parts *agent.Server) error {
|
|||
s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Look for dead servers to clean up
|
||||
if s.config.AutopilotServerCleanup {
|
||||
for _, member := range s.serfLAN.Members() {
|
||||
valid, _ := agent.IsConsulServer(member)
|
||||
if valid && member.Name != m.Name && member.Status == serf.StatusFailed {
|
||||
if err := s.handleDeregisterMember("Removing failed server", member); err != nil {
|
||||
return fmt.Errorf("[ERROR] consul: Couldn't deregister failed server (%s): %v", member.Name, err)
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: Removed failed server: %v", member.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -622,3 +622,72 @@ func TestLeader_ReapTombstones(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestLeader_DeadServerCleanup(t *testing.T) {
|
||||
dir1, s1 := testServerDCExpect(t, "dc1", 3)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
dir2, s2 := testServerDCExpect(t, "dc1", 3)
|
||||
defer os.RemoveAll(dir2)
|
||||
defer s2.Shutdown()
|
||||
|
||||
dir3, s3 := testServerDCExpect(t, "dc1", 3)
|
||||
defer os.RemoveAll(dir3)
|
||||
defer s3.Shutdown()
|
||||
|
||||
servers := []*Server{s1, s2, s3}
|
||||
|
||||
// Try to join
|
||||
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
||||
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
for _, s := range servers {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
peers, _ := s.numPeers()
|
||||
return peers == 3, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("should have 3 peers")
|
||||
})
|
||||
}
|
||||
|
||||
// Kill a non-leader server (s2 or s3, so s4 can still join s1)
|
||||
var nonLeader *Server
|
||||
var removedIndex int
|
||||
for i, s := range servers {
|
||||
if !s.IsLeader() && i > 0 {
|
||||
nonLeader = s
|
||||
removedIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
nonLeader.Shutdown()
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// Bring up and join a new server
|
||||
dir4, s4 := testServerDCExpect(t, "dc1", 3)
|
||||
defer os.RemoveAll(dir4)
|
||||
defer s4.Shutdown()
|
||||
|
||||
if _, err := s4.JoinLAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the dead server is removed and we're back to 3 total peers
|
||||
servers[removedIndex] = s4
|
||||
for _, s := range servers {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
peers, _ := s.numPeers()
|
||||
return peers == 3, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("should have 3 peers")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -317,6 +317,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
|
|||
conf.Tags["vsn"] = fmt.Sprintf("%d", s.config.ProtocolVersion)
|
||||
conf.Tags["vsn_min"] = fmt.Sprintf("%d", ProtocolVersionMin)
|
||||
conf.Tags["vsn_max"] = fmt.Sprintf("%d", ProtocolVersionMax)
|
||||
conf.Tags["raft_vsn"] = fmt.Sprintf("%d", s.config.RaftConfig.ProtocolVersion)
|
||||
conf.Tags["build"] = s.config.Build
|
||||
conf.Tags["port"] = fmt.Sprintf("%d", addr.Port)
|
||||
if s.config.Bootstrap {
|
||||
|
|
Loading…
Reference in a new issue