Saves the cycled server list after a failed ping when rebalancing. (#3662)

Fixes #3463
This commit is contained in:
James Phillips 2017-11-07 18:13:23 -08:00 committed by GitHub
parent e2367de05d
commit 9de2d8921f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 6 deletions

View File

@ -315,8 +315,7 @@ func (m *Manager) RebalanceServers() {
break
}
m.logger.Printf(`[DEBUG] manager: pinging server "%s" failed: %s`, srv, err)
l.cycleServer()
l.servers = l.cycleServer()
}
// If no healthy servers were found, sleep and wait for Serf to make

View File

@ -13,17 +13,38 @@ import (
"github.com/hashicorp/consul/agent/router"
)
type fauxConnPool struct {
// failPct between 0.0 and 1.0 == pct of time a Ping should fail
failPct float64
type fauxAddr struct {
addr string
}
func (cp *fauxConnPool) Ping(string, net.Addr, int, bool) (bool, error) {
func (a *fauxAddr) Network() string {
return "faux"
}
func (a *fauxAddr) String() string {
return a.addr
}
type fauxConnPool struct {
// failPct between 0.0 and 1.0 == pct of time a Ping should fail.
failPct float64
// failAddr fail whenever we see this address.
failAddr net.Addr
}
func (cp *fauxConnPool) Ping(dc string, addr net.Addr, version int, useTLS bool) (bool, error) {
var success bool
successProb := rand.Float64()
if successProb > cp.failPct {
success = true
}
if cp.failAddr != nil && addr.String() == cp.failAddr.String() {
success = false
}
return success, nil
}
@ -48,6 +69,13 @@ func testManagerFailProb(failPct float64) (m *router.Manager) {
return m
}
func testManagerFailAddr(failAddr net.Addr) (m *router.Manager) {
logger := log.New(os.Stderr, "", log.LstdFlags)
shutdownCh := make(chan struct{})
m = router.New(logger, shutdownCh, &fauxSerf{}, &fauxConnPool{failAddr: failAddr})
return m
}
// func (m *Manager) AddServer(server *metadata.Server) {
func TestServers_AddServer(t *testing.T) {
m := testManager()
@ -282,6 +310,31 @@ func TestServers_RebalanceServers(t *testing.T) {
}
}
func TestServers_RebalanceServers_AvoidFailed(t *testing.T) {
// Do a large number of rebalances with one failed server in the
// list and make sure we never have that one selected afterwards.
// This was added when fixing #3463, when we were just doing the
// shuffle and not actually cycling servers. We do a large number
// of trials with a small number of servers to try to make sure
// the shuffle alone won't give the right answer.
servers := []*metadata.Server{
&metadata.Server{Name: "s1", Addr: &fauxAddr{"s1"}},
&metadata.Server{Name: "s2", Addr: &fauxAddr{"s2"}},
&metadata.Server{Name: "s3", Addr: &fauxAddr{"s3"}},
}
for i := 0; i < 100; i++ {
m := testManagerFailAddr(&fauxAddr{"s2"})
for _, s := range servers {
m.AddServer(s)
}
m.RebalanceServers()
if front := m.FindServer().Name; front == "s2" {
t.Fatalf("should have avoided the failed server")
}
}
}
// func (m *Manager) RemoveServer(server *metadata.Server) {
func TestManager_RemoveServer(t *testing.T) {
const nodeNameFmt = "s%02d"