Reorganize version check logic for autopilot
This commit is contained in:
parent
22620d5e73
commit
0924d5a2de
|
@ -34,7 +34,7 @@ func (s *Server) stopAutopilot() {
|
|||
s.autopilotWaitGroup.Wait()
|
||||
}
|
||||
|
||||
var minAutopilotVersion, _ = version.NewVersion("0.8.0")
|
||||
var minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
||||
|
||||
// autopilotLoop periodically looks for nonvoting servers to promote and dead servers to remove.
|
||||
func (s *Server) autopilotLoop() {
|
||||
|
@ -49,66 +49,39 @@ func (s *Server) autopilotLoop() {
|
|||
case <-s.autopilotShutdownCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
state := s.fsm.State()
|
||||
_, autopilotConf, err := state.AutopilotConfig()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: error retrieving config from state store: %s", err)
|
||||
break
|
||||
}
|
||||
|
||||
// Setup autopilot config if we need to
|
||||
if autopilotConf == nil {
|
||||
if err := s.initializeAutopilot(); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: %v", err)
|
||||
}
|
||||
|
||||
autopilotConfig, ok := s.getAutopilotConfig()
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.autopilotPolicy.PromoteNonVoters(autopilotConf); err != nil {
|
||||
if err := s.autopilotPolicy.PromoteNonVoters(autopilotConfig); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: error checking for non-voters to promote: %s", err)
|
||||
}
|
||||
|
||||
if err := s.pruneDeadServers(); err != nil {
|
||||
if err := s.pruneDeadServers(autopilotConfig); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: error checking for dead servers to remove: %s", err)
|
||||
}
|
||||
case <-s.autopilotRemoveDeadCh:
|
||||
if err := s.pruneDeadServers(); err != nil {
|
||||
autopilotConfig, ok := s.getAutopilotConfig()
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.pruneDeadServers(autopilotConfig); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: error checking for dead servers to remove: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lowestServerVersion returns the lowest version among the alive servers
|
||||
func (s *Server) lowestServerVersion() *version.Version {
|
||||
lowest := minAutopilotVersion
|
||||
|
||||
for _, member := range s.LANMembers() {
|
||||
if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive {
|
||||
if parts.Build.LessThan(lowest) {
|
||||
lowest = &parts.Build
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lowest
|
||||
}
|
||||
|
||||
// pruneDeadServers removes up to numPeers/2 failed servers
|
||||
func (s *Server) pruneDeadServers() error {
|
||||
state := s.fsm.State()
|
||||
_, autopilotConf, err := state.AutopilotConfig()
|
||||
if err != nil || autopilotConf == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Server) pruneDeadServers(autopilotConfig *structs.AutopilotConfig) error {
|
||||
// Find any failed servers
|
||||
var failed []string
|
||||
staleRaftServers := make(map[string]raft.Server)
|
||||
if autopilotConf.CleanupDeadServers {
|
||||
if autopilotConfig.CleanupDeadServers {
|
||||
future := s.raft.GetConfiguration()
|
||||
if future.Error() != nil {
|
||||
if err := future.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -182,7 +155,7 @@ type BasicAutopilot struct {
|
|||
}
|
||||
|
||||
// PromoteNonVoters promotes eligible non-voting servers to voters.
|
||||
func (b *BasicAutopilot) PromoteNonVoters(autopilotConf *structs.AutopilotConfig) error {
|
||||
func (b *BasicAutopilot) PromoteNonVoters(autopilotConfig *structs.AutopilotConfig) error {
|
||||
minRaftProtocol, err := ServerMinRaftProtocol(b.server.LANMembers())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting server raft protocol versions: %s", err)
|
||||
|
@ -205,7 +178,7 @@ func (b *BasicAutopilot) PromoteNonVoters(autopilotConf *structs.AutopilotConfig
|
|||
// If this server has been stable and passing for long enough, promote it to a voter
|
||||
if !isVoter(server.Suffrage) {
|
||||
health := b.server.getServerHealth(string(server.ID))
|
||||
if health.IsStable(time.Now(), autopilotConf) {
|
||||
if health.IsStable(time.Now(), autopilotConfig) {
|
||||
promotions = append(promotions, server)
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -154,9 +154,7 @@ func (s *Server) establishLeadership() error {
|
|||
}
|
||||
|
||||
// Setup autopilot config if we need to
|
||||
if err := s.initializeAutopilot(); err != nil {
|
||||
s.logger.Printf("[ERR] autopilot: %v", err)
|
||||
}
|
||||
s.getAutopilotConfig()
|
||||
|
||||
s.startAutopilot()
|
||||
|
||||
|
@ -248,33 +246,31 @@ func (s *Server) initializeACL() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// initializeAutopilot is used to setup the autopilot config if we are
|
||||
// the leader and need to do this
|
||||
func (s *Server) initializeAutopilot() error {
|
||||
lowestVersion := s.lowestServerVersion()
|
||||
|
||||
if !lowestVersion.Equal(minAutopilotVersion) && !lowestVersion.GreaterThan(minAutopilotVersion) {
|
||||
return fmt.Errorf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String())
|
||||
}
|
||||
|
||||
// Bail if the config has already been initialized
|
||||
// getAutopilotConfig is used to get the autopilot config, initializing it if necessary
|
||||
func (s *Server) getAutopilotConfig() (*structs.AutopilotConfig, bool) {
|
||||
state := s.fsm.State()
|
||||
_, config, err := state.AutopilotConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get autopilot config: %v", err)
|
||||
s.logger.Printf("failed to get autopilot config: %v", err)
|
||||
return nil, false
|
||||
}
|
||||
if config != nil {
|
||||
return nil
|
||||
return config, true
|
||||
}
|
||||
|
||||
req := structs.AutopilotSetConfigRequest{
|
||||
Config: *s.config.AutopilotConfig,
|
||||
if !ServersMeetMinimumVersion(s.LANMembers(), minAutopilotVersion) {
|
||||
s.logger.Printf("can't initialize autopilot until all servers are >= %s", minAutopilotVersion.String())
|
||||
return nil, false
|
||||
}
|
||||
|
||||
config = s.config.AutopilotConfig
|
||||
req := structs.AutopilotSetConfigRequest{Config: *config}
|
||||
if _, err = s.raftApply(structs.AutopilotRequestType, req); err != nil {
|
||||
return fmt.Errorf("failed to initialize autopilot config")
|
||||
s.logger.Printf("failed to initialize autopilot config")
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return nil
|
||||
return config, true
|
||||
}
|
||||
|
||||
// reconcile is used to reconcile the differences between Serf
|
||||
|
|
|
@ -7,6 +7,8 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/hashicorp/consul/consul/agent"
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
|
@ -295,3 +297,17 @@ func runtimeStats() map[string]string {
|
|||
"cpu_count": strconv.FormatInt(int64(runtime.NumCPU()), 10),
|
||||
}
|
||||
}
|
||||
|
||||
// ServersMeetMinimumVersion returns whether the given alive servers are at least on the
|
||||
// given Consul version
|
||||
func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Version) bool {
|
||||
for _, member := range members {
|
||||
if valid, parts := agent.IsConsulServer(member); valid && parts.Status == serf.StatusAlive {
|
||||
if parts.Build.LessThan(minVersion) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
Loading…
Reference in New Issue