autopilot: include only servers from the same region (#15290)
When we migrated to the updated autopilot library in Nomad 1.4.0, the interface for finding servers changed. Previously autopilot would get the serf members and call `IsServer` on each of them, leaving it up to the implementor to filter out clients (and in Nomad's case, other regions). But in the "new" autopilot library, the equivalent interface is `KnownServers` for which we did not filter by region. This causes spurious attempts for the cross-region stats fetching, which results in TLS errors and a lot of log noise. Filter the member set by region to fix the regression.
This commit is contained in:
parent
510eb435dc
commit
d0f9e887f7
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
autopilot: Fixed a bug where autopilot would try to fetch raft stats from other regions
|
||||
```
|
|
@ -195,7 +195,7 @@ func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server {
|
|||
s.logger.Warn("Error parsing server info", "name", member.Name, "error", err)
|
||||
continue
|
||||
} else if srv == nil {
|
||||
// this member was a client
|
||||
// this member was a client or in another region
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -210,6 +210,9 @@ func (s *Server) autopilotServer(m serf.Member) (*autopilot.Server, error) {
|
|||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
if srv.Region != s.Region() {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return s.autopilotServerFromMetadata(srv)
|
||||
}
|
||||
|
|
|
@ -190,6 +190,42 @@ func TestAutopilot_RollingUpdate(t *testing.T) {
|
|||
waitForStableLeadership(t, servers)
|
||||
}
|
||||
|
||||
func TestAutopilot_MultiRegion(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
conf := func(c *Config) {
|
||||
c.NumSchedulers = 0 // reduces test log noise
|
||||
c.BootstrapExpect = 3
|
||||
}
|
||||
s1, cleanupS1 := TestServer(t, conf)
|
||||
defer cleanupS1()
|
||||
|
||||
s2, cleanupS2 := TestServer(t, conf)
|
||||
defer cleanupS2()
|
||||
|
||||
s3, cleanupS3 := TestServer(t, conf)
|
||||
defer cleanupS3()
|
||||
|
||||
// federated regions should not be considered raft peers or show up in the
|
||||
// known servers list
|
||||
s4, cleanupS4 := TestServer(t, func(c *Config) {
|
||||
c.BootstrapExpect = 0
|
||||
c.Region = "other"
|
||||
})
|
||||
defer cleanupS4()
|
||||
|
||||
servers := []*Server{s1, s2, s3}
|
||||
TestJoin(t, s1, s2, s3, s4)
|
||||
|
||||
t.Logf("waiting for initial stable cluster")
|
||||
waitForStableLeadership(t, servers)
|
||||
|
||||
apDelegate := &AutopilotDelegate{s3}
|
||||
known := apDelegate.KnownServers()
|
||||
must.Eq(t, 3, len(known))
|
||||
|
||||
}
|
||||
|
||||
func TestAutopilot_CleanupStaleRaftServer(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
|
|
Loading…
Reference in New Issue