ff3610850e
The router.Manager is already rebalancing servers for other connection pools, so it can call into our resolver to do the same. This change allows us to remove the serf dependency from resolverBuilder, and remove Datacenter from the config. Also revert the change to refreshServerRebalanceTimer
406 lines
11 KiB
Go
406 lines
11 KiB
Go
package router
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math/rand"
|
|
"net"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/agent/metadata"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
var (
|
|
localLogBuffer *bytes.Buffer
|
|
)
|
|
|
|
func init() {
|
|
localLogBuffer = new(bytes.Buffer)
|
|
}
|
|
|
|
func GetBufferedLogger() hclog.Logger {
|
|
localLogBuffer = new(bytes.Buffer)
|
|
return hclog.New(&hclog.LoggerOptions{
|
|
Level: 0,
|
|
Output: localLogBuffer,
|
|
})
|
|
}
|
|
|
|
type fauxConnPool struct {
|
|
// failPct between 0.0 and 1.0 == pct of time a Ping should fail
|
|
failPct float64
|
|
}
|
|
|
|
func (cp *fauxConnPool) Ping(string, string, net.Addr) (bool, error) {
|
|
var success bool
|
|
successProb := rand.Float64()
|
|
if successProb > cp.failPct {
|
|
success = true
|
|
}
|
|
return success, nil
|
|
}
|
|
|
|
type fauxSerf struct {
|
|
numNodes int
|
|
}
|
|
|
|
func (s *fauxSerf) NumNodes() int {
|
|
return s.numNodes
|
|
}
|
|
|
|
func testManager() (m *Manager) {
|
|
logger := GetBufferedLogger()
|
|
shutdownCh := make(chan struct{})
|
|
m = New(logger, shutdownCh, &fauxSerf{numNodes: 16384}, &fauxConnPool{}, "", noopRebalancer)
|
|
return m
|
|
}
|
|
|
|
func noopRebalancer() {}
|
|
|
|
func testManagerFailProb(failPct float64) (m *Manager) {
|
|
logger := GetBufferedLogger()
|
|
shutdownCh := make(chan struct{})
|
|
m = New(logger, shutdownCh, &fauxSerf{}, &fauxConnPool{failPct: failPct}, "", noopRebalancer)
|
|
return m
|
|
}
|
|
|
|
// func (l *serverList) cycleServer() (servers []*metadata.Server) {
|
|
func TestManagerInternal_cycleServer(t *testing.T) {
|
|
m := testManager()
|
|
l := m.getServerList()
|
|
|
|
server0 := &metadata.Server{Name: "server1"}
|
|
server1 := &metadata.Server{Name: "server2"}
|
|
server2 := &metadata.Server{Name: "server3"}
|
|
l.servers = append(l.servers, server0, server1, server2)
|
|
m.saveServerList(l)
|
|
|
|
l = m.getServerList()
|
|
if len(l.servers) != 3 {
|
|
t.Fatalf("server length incorrect: %d/3", len(l.servers))
|
|
}
|
|
if l.servers[0] != server0 &&
|
|
l.servers[1] != server1 &&
|
|
l.servers[2] != server2 {
|
|
t.Fatalf("initial server ordering not correct")
|
|
}
|
|
|
|
l.servers = l.cycleServer()
|
|
if len(l.servers) != 3 {
|
|
t.Fatalf("server length incorrect: %d/3", len(l.servers))
|
|
}
|
|
if l.servers[0] != server1 &&
|
|
l.servers[1] != server2 &&
|
|
l.servers[2] != server0 {
|
|
t.Fatalf("server ordering after one cycle not correct")
|
|
}
|
|
|
|
l.servers = l.cycleServer()
|
|
if len(l.servers) != 3 {
|
|
t.Fatalf("server length incorrect: %d/3", len(l.servers))
|
|
}
|
|
if l.servers[0] != server2 &&
|
|
l.servers[1] != server0 &&
|
|
l.servers[2] != server1 {
|
|
t.Fatalf("server ordering after two cycles not correct")
|
|
}
|
|
|
|
l.servers = l.cycleServer()
|
|
if len(l.servers) != 3 {
|
|
t.Fatalf("server length incorrect: %d/3", len(l.servers))
|
|
}
|
|
if l.servers[0] != server0 &&
|
|
l.servers[1] != server1 &&
|
|
l.servers[2] != server2 {
|
|
t.Fatalf("server ordering after three cycles not correct")
|
|
}
|
|
}
|
|
|
|
// func (m *Manager) getServerList() serverList {
|
|
func TestManagerInternal_getServerList(t *testing.T) {
|
|
m := testManager()
|
|
l := m.getServerList()
|
|
if l.servers == nil {
|
|
t.Fatalf("serverList.servers nil")
|
|
}
|
|
|
|
if len(l.servers) != 0 {
|
|
t.Fatalf("serverList.servers length not zero")
|
|
}
|
|
}
|
|
|
|
func TestManagerInternal_New(t *testing.T) {
|
|
m := testManager()
|
|
if m == nil {
|
|
t.Fatalf("Manager nil")
|
|
}
|
|
|
|
if m.clusterInfo == nil {
|
|
t.Fatalf("Manager.clusterInfo nil")
|
|
}
|
|
|
|
if m.logger == nil {
|
|
t.Fatalf("Manager.logger nil")
|
|
}
|
|
|
|
if m.shutdownCh == nil {
|
|
t.Fatalf("Manager.shutdownCh nil")
|
|
}
|
|
}
|
|
|
|
// func (m *Manager) reconcileServerList(l *serverList) bool {
|
|
func TestManagerInternal_reconcileServerList(t *testing.T) {
|
|
tests := []int{0, 1, 2, 3, 4, 5, 10, 100}
|
|
for _, n := range tests {
|
|
ok, err := test_reconcileServerList(n)
|
|
if !ok {
|
|
t.Errorf("Expected %d to pass: %v", n, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func test_reconcileServerList(maxServers int) (bool, error) {
|
|
// Build a server list, reconcile, verify the missing servers are
|
|
// missing, the added have been added, and the original server is
|
|
// present.
|
|
const failPct = 0.5
|
|
m := testManagerFailProb(failPct)
|
|
|
|
var failedServers, healthyServers []*metadata.Server
|
|
for i := 0; i < maxServers; i++ {
|
|
nodeName := fmt.Sprintf("s%02d", i)
|
|
|
|
node := &metadata.Server{Name: nodeName}
|
|
// Add 66% of servers to Manager
|
|
if rand.Float64() > 0.33 {
|
|
m.AddServer(node)
|
|
|
|
// Of healthy servers, (ab)use connPoolPinger to
|
|
// failPct of the servers for the reconcile. This
|
|
// allows for the selected server to no longer be
|
|
// healthy for the reconcile below.
|
|
if ok, _ := m.connPoolPinger.Ping(node.Datacenter, node.ShortName, node.Addr); ok {
|
|
// Will still be present
|
|
healthyServers = append(healthyServers, node)
|
|
} else {
|
|
// Will be missing
|
|
failedServers = append(failedServers, node)
|
|
}
|
|
} else {
|
|
// Will be added from the call to reconcile
|
|
healthyServers = append(healthyServers, node)
|
|
}
|
|
}
|
|
|
|
// Randomize Manager's server list
|
|
m.RebalanceServers()
|
|
selectedServer := m.FindServer()
|
|
|
|
var selectedServerFailed bool
|
|
for _, s := range failedServers {
|
|
if selectedServer.Key().Equal(s.Key()) {
|
|
selectedServerFailed = true
|
|
break
|
|
}
|
|
}
|
|
|
|
// Update Manager's server list to be "healthy" based on Serf.
|
|
// Reconcile this with origServers, which is shuffled and has a live
|
|
// connection, but possibly out of date.
|
|
origServers := m.getServerList()
|
|
m.saveServerList(serverList{servers: healthyServers})
|
|
|
|
// This should always succeed with non-zero server lists
|
|
if !selectedServerFailed && !m.reconcileServerList(&origServers) &&
|
|
len(m.getServerList().servers) != 0 &&
|
|
len(origServers.servers) != 0 {
|
|
// If the random gods are unfavorable and we end up with zero
|
|
// length lists, expect things to fail and retry the test.
|
|
return false, fmt.Errorf("Expected reconcile to succeed: %v %d %d",
|
|
selectedServerFailed,
|
|
len(m.getServerList().servers),
|
|
len(origServers.servers))
|
|
}
|
|
|
|
// If we have zero-length server lists, test succeeded in degenerate
|
|
// case.
|
|
if len(m.getServerList().servers) == 0 &&
|
|
len(origServers.servers) == 0 {
|
|
// Failed as expected w/ zero length list
|
|
return true, nil
|
|
}
|
|
|
|
resultingServerMap := make(map[metadata.Key]bool)
|
|
for _, s := range m.getServerList().servers {
|
|
resultingServerMap[*s.Key()] = true
|
|
}
|
|
|
|
// Test to make sure no failed servers are in the Manager's
|
|
// list. Error if there are any failedServers in l.servers
|
|
for _, s := range failedServers {
|
|
_, ok := resultingServerMap[*s.Key()]
|
|
if ok {
|
|
return false, fmt.Errorf("Found failed server %v in merged list %v", s, resultingServerMap)
|
|
}
|
|
}
|
|
|
|
// Test to make sure all healthy servers are in the healthy list.
|
|
if len(healthyServers) != len(m.getServerList().servers) {
|
|
return false, fmt.Errorf("Expected healthy map and servers to match: %d/%d", len(healthyServers), len(healthyServers))
|
|
}
|
|
|
|
// Test to make sure all healthy servers are in the resultingServerMap list.
|
|
for _, s := range healthyServers {
|
|
_, ok := resultingServerMap[*s.Key()]
|
|
if !ok {
|
|
return false, fmt.Errorf("Server %v missing from healthy map after merged lists", s)
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// func (l *serverList) refreshServerRebalanceTimer() {
|
|
func TestManagerInternal_refreshServerRebalanceTimer(t *testing.T) {
|
|
type clusterSizes struct {
|
|
numNodes int
|
|
numServers int
|
|
minRebalance time.Duration
|
|
}
|
|
clusters := []clusterSizes{
|
|
{0, 3, 2 * time.Minute},
|
|
{1, 0, 2 * time.Minute}, // partitioned cluster
|
|
{1, 3, 2 * time.Minute},
|
|
{2, 3, 2 * time.Minute},
|
|
{100, 0, 2 * time.Minute}, // partitioned
|
|
{100, 1, 2 * time.Minute}, // partitioned
|
|
{100, 3, 2 * time.Minute},
|
|
{1024, 1, 2 * time.Minute}, // partitioned
|
|
{1024, 3, 2 * time.Minute}, // partitioned
|
|
{1024, 5, 2 * time.Minute},
|
|
{16384, 1, 4 * time.Minute}, // partitioned
|
|
{16384, 2, 2 * time.Minute}, // partitioned
|
|
{16384, 3, 2 * time.Minute}, // partitioned
|
|
{16384, 5, 2 * time.Minute},
|
|
{65535, 0, 2 * time.Minute}, // partitioned
|
|
{65535, 1, 8 * time.Minute}, // partitioned
|
|
{65535, 2, 3 * time.Minute}, // partitioned
|
|
{65535, 3, 5 * time.Minute}, // partitioned
|
|
{65535, 5, 3 * time.Minute}, // partitioned
|
|
{65535, 7, 2 * time.Minute},
|
|
{1000000, 1, 4 * time.Hour}, // partitioned
|
|
{1000000, 2, 2 * time.Hour}, // partitioned
|
|
{1000000, 3, 80 * time.Minute}, // partitioned
|
|
{1000000, 5, 50 * time.Minute}, // partitioned
|
|
{1000000, 11, 20 * time.Minute}, // partitioned
|
|
{1000000, 19, 10 * time.Minute},
|
|
}
|
|
|
|
logger := GetBufferedLogger()
|
|
shutdownCh := make(chan struct{})
|
|
|
|
for _, s := range clusters {
|
|
m := New(logger, shutdownCh, &fauxSerf{numNodes: s.numNodes}, &fauxConnPool{}, "", noopRebalancer)
|
|
for i := 0; i < s.numServers; i++ {
|
|
nodeName := fmt.Sprintf("s%02d", i)
|
|
m.AddServer(&metadata.Server{Name: nodeName})
|
|
}
|
|
|
|
d := m.refreshServerRebalanceTimer()
|
|
if d < s.minRebalance {
|
|
t.Errorf("duration too short for cluster of size %d and %d servers (%s < %s)", s.numNodes, s.numServers, d, s.minRebalance)
|
|
}
|
|
}
|
|
}
|
|
|
|
// func (m *Manager) saveServerList(l serverList) {
|
|
func TestManagerInternal_saveServerList(t *testing.T) {
|
|
m := testManager()
|
|
|
|
// Initial condition
|
|
func() {
|
|
l := m.getServerList()
|
|
if len(l.servers) != 0 {
|
|
t.Fatalf("Manager.saveServerList failed to load init config")
|
|
}
|
|
|
|
newServer := new(metadata.Server)
|
|
l.servers = append(l.servers, newServer)
|
|
m.saveServerList(l)
|
|
}()
|
|
|
|
// Test that save works
|
|
func() {
|
|
l1 := m.getServerList()
|
|
t1NumServers := len(l1.servers)
|
|
if t1NumServers != 1 {
|
|
t.Fatalf("Manager.saveServerList failed to save mutated config")
|
|
}
|
|
}()
|
|
|
|
// Verify mutation w/o a save doesn't alter the original
|
|
func() {
|
|
newServer := new(metadata.Server)
|
|
l := m.getServerList()
|
|
l.servers = append(l.servers, newServer)
|
|
|
|
l_orig := m.getServerList()
|
|
origNumServers := len(l_orig.servers)
|
|
if origNumServers >= len(l.servers) {
|
|
t.Fatalf("Manager.saveServerList unsaved config overwrote original")
|
|
}
|
|
}()
|
|
}
|
|
|
|
func TestManager_healthyServer(t *testing.T) {
|
|
t.Run("checking itself", func(t *testing.T) {
|
|
m := testManager()
|
|
m.serverName = "s1"
|
|
server := metadata.Server{Name: m.serverName}
|
|
require.True(t, m.healthyServer(&server))
|
|
})
|
|
t.Run("checking another server with successful ping", func(t *testing.T) {
|
|
m := testManager()
|
|
server := metadata.Server{Name: "s1"}
|
|
require.True(t, m.healthyServer(&server))
|
|
})
|
|
t.Run("checking another server with failed ping", func(t *testing.T) {
|
|
m := testManagerFailProb(1)
|
|
server := metadata.Server{Name: "s1"}
|
|
require.False(t, m.healthyServer(&server))
|
|
})
|
|
}
|
|
|
|
func TestManager_Rebalance(t *testing.T) {
|
|
t.Run("single server cluster checking itself", func(t *testing.T) {
|
|
m := testManager()
|
|
m.serverName = "s1"
|
|
m.AddServer(&metadata.Server{Name: m.serverName})
|
|
m.RebalanceServers()
|
|
require.False(t, m.IsOffline())
|
|
})
|
|
t.Run("multi server cluster is unhealthy when pings always fail", func(t *testing.T) {
|
|
m := testManagerFailProb(1)
|
|
m.AddServer(&metadata.Server{Name: "s1"})
|
|
m.AddServer(&metadata.Server{Name: "s2"})
|
|
m.AddServer(&metadata.Server{Name: "s3"})
|
|
for i := 0; i < 100; i++ {
|
|
m.RebalanceServers()
|
|
require.True(t, m.IsOffline())
|
|
}
|
|
})
|
|
t.Run("multi server cluster checking itself remains healthy despite pings always fail", func(t *testing.T) {
|
|
m := testManagerFailProb(1)
|
|
m.serverName = "s1"
|
|
m.AddServer(&metadata.Server{Name: m.serverName})
|
|
m.AddServer(&metadata.Server{Name: "s2"})
|
|
m.AddServer(&metadata.Server{Name: "s3"})
|
|
for i := 0; i < 100; i++ {
|
|
m.RebalanceServers()
|
|
require.False(t, m.IsOffline())
|
|
}
|
|
})
|
|
}
|