open-nomad/client/rpcproxy/rpcproxy_test.go
2016-06-11 18:17:20 -04:00

819 lines
20 KiB
Go

package rpcproxy
import (
"bytes"
"encoding/binary"
"fmt"
"log"
"math/rand"
"net"
"os"
"strings"
"sync/atomic"
"testing"
"time"
)
const (
ipv4len = 4
nodeNameFmt = "s%03d"
defaultNomadPort = "4647"
// Poached from RFC2544 and RFC3330
testingNetworkCidr = "198.18.0.0/15"
testingNetworkUint32 = 3323068416
)
var (
localLogger *log.Logger
localLogBuffer *bytes.Buffer
serverCount uint32
validIp uint32
)
func init() {
localLogBuffer = new(bytes.Buffer)
localLogger = log.New(localLogBuffer, "", 0)
}
func makeServerEndpointName() string {
serverNum := atomic.AddUint32(&serverCount, 1)
validIp := testingNetworkUint32 + serverNum
ipv4 := make(net.IP, ipv4len)
binary.BigEndian.PutUint32(ipv4, validIp)
return net.JoinHostPort(ipv4.String(), defaultNomadPort)
}
func GetBufferedLogger() *log.Logger {
return localLogger
}
type fauxConnPool struct {
// failPct between 0.0 and 1.0 == pct of time a Ping should fail
failPct float64
}
func (cp *fauxConnPool) PingNomadServer(region string, majorVersion int, s *ServerEndpoint) (bool, error) {
var success bool
successProb := rand.Float64()
if successProb > cp.failPct {
success = true
}
return success, nil
}
type fauxSerf struct {
datacenter string
numNodes int
region string
rpcMinorVersion int
rpcMajorVersion int
}
func (s *fauxSerf) NumNodes() int {
return s.numNodes
}
func (s *fauxSerf) Region() string {
return s.region
}
func (s *fauxSerf) Datacenter() string {
return s.datacenter
}
func (s *fauxSerf) RPCMajorVersion() int {
return s.rpcMajorVersion
}
func (s *fauxSerf) RPCMinorVersion() int {
return s.rpcMinorVersion
}
func testRPCProxy() (p *RPCProxy) {
logger := GetBufferedLogger()
logger = log.New(os.Stderr, "", log.LstdFlags)
shutdownCh := make(chan struct{})
p = NewRPCProxy(logger, shutdownCh, &fauxSerf{numNodes: 16384}, &fauxConnPool{})
return p
}
func testRPCProxyFailProb(failPct float64) (p *RPCProxy) {
logger := GetBufferedLogger()
logger = log.New(os.Stderr, "", log.LstdFlags)
shutdownCh := make(chan struct{})
p = NewRPCProxy(logger, shutdownCh, &fauxSerf{}, &fauxConnPool{failPct: failPct})
return p
}
// func (p *RPCProxy) AddPrimaryServer(server *ServerEndpoint) {
func TestRPCProxy_AddPrimaryServer(t *testing.T) {
p := testRPCProxy()
var num int
num = p.NumServers()
if num != 0 {
t.Fatalf("Expected zero servers to start")
}
s1Endpoint := makeServerEndpointName()
s1 := p.AddPrimaryServer(s1Endpoint)
num = p.NumServers()
if num != 1 {
t.Fatalf("Expected one server")
}
if s1 == nil {
t.Fatalf("bad")
}
if s1.Name != s1Endpoint {
t.Fatalf("bad")
}
s1 = p.AddPrimaryServer(s1Endpoint)
num = p.NumServers()
if num != 1 {
t.Fatalf("Expected one server (still)")
}
if s1 == nil {
t.Fatalf("bad")
}
if s1.Name != s1Endpoint {
t.Fatalf("bad")
}
s2Endpoint := makeServerEndpointName()
s2 := p.AddPrimaryServer(s2Endpoint)
num = p.NumServers()
if num != 2 {
t.Fatalf("Expected two servers")
}
if s2 == nil {
t.Fatalf("bad")
}
if s2.Name != s2Endpoint {
t.Fatalf("bad")
}
}
// func (p *RPCProxy) FindServer() (server *ServerEndpoint) {
func TestRPCProxy_FindServer(t *testing.T) {
p := testRPCProxy()
if p.FindServer() != nil {
t.Fatalf("Expected nil return")
}
s1Endpoint := makeServerEndpointName()
p.AddPrimaryServer(s1Endpoint)
if p.NumServers() != 1 {
t.Fatalf("Expected one server")
}
s1 := p.FindServer()
if s1 == nil {
t.Fatalf("Expected non-nil server")
}
if s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server")
}
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server (still)")
}
s2Endpoint := makeServerEndpointName()
p.AddPrimaryServer(s2Endpoint)
if p.NumServers() != 2 {
t.Fatalf("Expected two servers")
}
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server (still)")
}
p.NotifyFailedServer(s1)
s2 := p.FindServer()
if s2 == nil || s2.Name != s2Endpoint {
t.Fatalf("Expected s2 server")
}
p.NotifyFailedServer(s2)
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server")
}
}
// func New(logger *log.Logger, shutdownCh chan struct{}) (p *RPCProxy) {
func TestRPCProxy_New(t *testing.T) {
logger := GetBufferedLogger()
logger = log.New(os.Stderr, "", log.LstdFlags)
shutdownCh := make(chan struct{})
p := NewRPCProxy(logger, shutdownCh, &fauxSerf{}, &fauxConnPool{})
if p == nil {
t.Fatalf("RPCProxy nil")
}
}
// func (p *RPCProxy) NotifyFailedServer(server *ServerEndpoint) {
func TestRPCProxy_NotifyFailedServer(t *testing.T) {
p := testRPCProxy()
if p.NumServers() != 0 {
t.Fatalf("Expected zero servers to start")
}
// Try notifying for a server that is not managed by RPCProxy
s1Endpoint := makeServerEndpointName()
s1 := p.AddPrimaryServer(s1Endpoint)
if s1 == nil {
t.Fatalf("bad")
}
if p.NumServers() != 1 {
t.Fatalf("bad")
}
p.RemoveServer(s1)
if p.NumServers() != 0 {
t.Fatalf("bad")
}
p.NotifyFailedServer(s1)
s1 = p.AddPrimaryServer(s1Endpoint)
// Test again w/ a server not in the list
s2Endpoint := makeServerEndpointName()
s2 := p.AddPrimaryServer(s2Endpoint)
if s2 == nil {
t.Fatalf("bad")
}
if p.NumServers() != 2 {
t.Fatalf("bad")
}
p.RemoveServer(s2)
if p.NumServers() != 1 {
t.Fatalf("bad")
}
p.NotifyFailedServer(s2)
if p.NumServers() != 1 {
t.Fatalf("Expected one server")
}
// Re-add s2 so there are two servers in the RPCProxy server list
s2 = p.AddPrimaryServer(s2Endpoint)
if p.NumServers() != 2 {
t.Fatalf("Expected two servers")
}
// Find the first server, it should be s1
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server")
}
// Notify s2 as failed, s1 should still be first
p.NotifyFailedServer(s2)
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server (still)")
}
// Fail s1, s2 should be first
p.NotifyFailedServer(s1)
s2 = p.FindServer()
if s2 == nil || s2.Name != s2Endpoint {
t.Fatalf("Expected s2 server")
}
// Fail s2, s1 should be first
p.NotifyFailedServer(s2)
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server")
}
}
// func (p *RPCProxy) NumServers() (numServers int) {
func TestRPCProxy_NumServers(t *testing.T) {
p := testRPCProxy()
const maxNumServers = 100
serverList := make([]*ServerEndpoint, 0, maxNumServers)
// Add some servers
for i := 0; i < maxNumServers; i++ {
num := p.NumServers()
if num != i {
t.Fatalf("%d: Expected %d servers", i, num)
}
serverName := makeServerEndpointName()
s := p.AddPrimaryServer(serverName)
if s == nil {
t.Fatalf("Expected server from %+q", serverName)
}
serverList = append(serverList, s)
num = p.NumServers()
if num != i+1 {
t.Fatalf("%d: Expected %d servers", i, num+1)
}
}
// Remove some servers
for i := maxNumServers; i > 0; i-- {
num := p.NumServers()
if num != i {
t.Fatalf("%d: Expected %d servers", i, num)
}
p.RemoveServer(serverList[i-1])
num = p.NumServers()
if num != i-1 {
t.Fatalf("%d: Expected %d servers", i, num-1)
}
}
}
// func (p *RPCProxy) RebalanceServers() {
func TestRPCProxy_RebalanceServers(t *testing.T) {
const failPct = 0.5
p := testRPCProxyFailProb(failPct)
const maxServers = 100
const numShuffleTests = 100
const uniquePassRate = 0.5
// Make a huge list of nodes.
for i := 0; i < maxServers; i++ {
p.AddPrimaryServer(makeServerEndpointName())
}
// Keep track of how many unique shuffles we get.
uniques := make(map[string]struct{}, maxServers)
for i := 0; i < numShuffleTests; i++ {
p.RebalanceServers()
var names []string
for j := 0; j < maxServers; j++ {
server := p.FindServer()
p.NotifyFailedServer(server)
names = append(names, server.Name)
}
key := strings.Join(names, "|")
uniques[key] = struct{}{}
}
// We have to allow for the fact that there won't always be a unique
// shuffle each pass, so we just look for smell here without the test
// being flaky.
if len(uniques) < int(maxServers*uniquePassRate) {
t.Fatalf("unique shuffle ratio too low: %d/%d", len(uniques), maxServers)
}
}
// func (p *RPCProxy) RemoveServer(server *ServerEndpoint) {
func TestRPCProxy_RemoveServer(t *testing.T) {
p := testRPCProxy()
if p.NumServers() != 0 {
t.Fatalf("Expected zero servers to start")
}
// Test removing server before its added
s1Endpoint := makeServerEndpointName()
s1 := p.AddPrimaryServer(s1Endpoint)
if p.NumServers() != 1 {
t.Fatalf("bad")
}
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server: %+q", s1.Name)
}
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 server: %+q", s1.Name)
}
p.RemoveServer(s1)
if p.NumServers() != 0 {
t.Fatalf("bad")
}
// Remove it a second time now that it doesn't exist
p.RemoveServer(s1)
if p.NumServers() != 0 {
t.Fatalf("bad")
}
p.AddPrimaryServer(s1Endpoint)
if p.NumServers() != 1 {
t.Fatalf("bad")
}
s2Endpoint := makeServerEndpointName()
s2 := p.AddPrimaryServer(s2Endpoint)
if p.NumServers() != 2 {
t.Fatalf("bad")
}
if s2 == nil || s2.Name != s2Endpoint {
t.Fatalf("Expected s2 server: %+q", s2.Name)
}
s1 = p.FindServer()
if s1 == nil || s1.Name != s1Endpoint {
t.Fatalf("Expected s1 to be the front of the list: %+q==%+q", s1.Name, s1Endpoint)
}
// Move s1 to the back of the server list
p.NotifyFailedServer(s1)
s2 = p.FindServer()
if s2 == nil || s2.Name != s2Endpoint {
t.Fatalf("Expected s2 server: %+q", s2Endpoint)
}
p.RemoveServer(s2)
if p.NumServers() != 1 {
t.Fatalf("bad")
}
p.RemoveServer(s2)
if p.NumServers() != 1 {
t.Fatalf("bad")
}
p.AddPrimaryServer(s2Endpoint)
const maxServers = 19
servers := make([]*ServerEndpoint, 0, maxServers)
servers = append(servers, s1)
servers = append(servers, s2)
// Already added two servers above
for i := maxServers; i > 2; i-- {
server := p.AddPrimaryServer(makeServerEndpointName())
servers = append(servers, server)
}
if p.NumServers() != maxServers {
t.Fatalf("Expected %d servers, received %d", maxServers, p.NumServers())
}
p.RebalanceServers()
if p.NumServers() != maxServers {
t.Fatalf("Expected %d servers, received %d", maxServers, p.NumServers())
}
findServer := func(server *ServerEndpoint) bool {
for i := p.NumServers(); i > 0; i-- {
s := p.FindServer()
if s == server {
return true
}
}
return false
}
expectedNumServers := maxServers
removedServers := make([]*ServerEndpoint, 0, maxServers)
// Remove servers from the front of the list
for i := 3; i > 0; i-- {
server := p.FindServer()
if server == nil {
t.Fatalf("FindServer returned nil")
}
p.RemoveServer(server)
expectedNumServers--
if p.NumServers() != expectedNumServers {
t.Fatalf("Expected %d servers (got %d)", expectedNumServers, p.NumServers())
}
if findServer(server) == true {
t.Fatalf("Did not expect to find server %s after removal from the front", server.Name)
}
removedServers = append(removedServers, server)
}
// Remove server from the end of the list
for i := 3; i > 0; i-- {
server := p.FindServer()
p.NotifyFailedServer(server)
p.RemoveServer(server)
expectedNumServers--
if p.NumServers() != expectedNumServers {
t.Fatalf("Expected %d servers (got %d)", expectedNumServers, p.NumServers())
}
if findServer(server) == true {
t.Fatalf("Did not expect to find server %s", server.Name)
}
removedServers = append(removedServers, server)
}
// Remove server from the middle of the list
for i := 3; i > 0; i-- {
server := p.FindServer()
p.NotifyFailedServer(server)
server2 := p.FindServer()
p.NotifyFailedServer(server2) // server2 now at end of the list
p.RemoveServer(server)
expectedNumServers--
if p.NumServers() != expectedNumServers {
t.Fatalf("Expected %d servers (got %d)", expectedNumServers, p.NumServers())
}
if findServer(server) == true {
t.Fatalf("Did not expect to find server %s", server.Name)
}
removedServers = append(removedServers, server)
}
if p.NumServers()+len(removedServers) != maxServers {
t.Fatalf("Expected %d+%d=%d servers", p.NumServers(), len(removedServers), maxServers)
}
// Drain the remaining servers from the middle
for i := p.NumServers(); i > 0; i-- {
server := p.FindServer()
p.NotifyFailedServer(server)
server2 := p.FindServer()
p.NotifyFailedServer(server2) // server2 now at end of the list
p.RemoveServer(server)
removedServers = append(removedServers, server)
}
if p.NumServers() != 0 {
t.Fatalf("Expected an empty server list")
}
if len(removedServers) != maxServers {
t.Fatalf("Expected all servers to be in removed server list")
}
}
// func (p *RPCProxy) Start() {
// func (l *serverList) cycleServer() (servers []*Server) {
func TestRPCProxyInternal_cycleServer(t *testing.T) {
p := testRPCProxy()
l := p.getServerList()
server0 := &ServerEndpoint{Name: "server1"}
server1 := &ServerEndpoint{Name: "server2"}
server2 := &ServerEndpoint{Name: "server3"}
l.L = append(l.L, server0, server1, server2)
p.saveServerList(l)
l = p.getServerList()
if len(l.L) != 3 {
t.Fatalf("server length incorrect: %d/3", len(l.L))
}
if l.L[0] != server0 &&
l.L[1] != server1 &&
l.L[2] != server2 {
t.Fatalf("initial server ordering not correct")
}
l.L = l.cycleServer()
if len(l.L) != 3 {
t.Fatalf("server length incorrect: %d/3", len(l.L))
}
if l.L[0] != server1 &&
l.L[1] != server2 &&
l.L[2] != server0 {
t.Fatalf("server ordering after one cycle not correct")
}
l.L = l.cycleServer()
if len(l.L) != 3 {
t.Fatalf("server length incorrect: %d/3", len(l.L))
}
if l.L[0] != server2 &&
l.L[1] != server0 &&
l.L[2] != server1 {
t.Fatalf("server ordering after two cycles not correct")
}
l.L = l.cycleServer()
if len(l.L) != 3 {
t.Fatalf("server length incorrect: %d/3", len(l.L))
}
if l.L[0] != server0 &&
l.L[1] != server1 &&
l.L[2] != server2 {
t.Fatalf("server ordering after three cycles not correct")
}
}
// func (p *RPCProxy) getServerList() serverList {
func TestRPCProxyInternal_getServerList(t *testing.T) {
p := testRPCProxy()
l := p.getServerList()
if l.L == nil {
t.Fatalf("serverList.servers nil")
}
if len(l.L) != 0 {
t.Fatalf("serverList.servers length not zero")
}
}
func TestRPCProxyInternal_New(t *testing.T) {
p := testRPCProxy()
if p == nil {
t.Fatalf("bad")
}
if p.logger == nil {
t.Fatalf("bad")
}
if p.shutdownCh == nil {
t.Fatalf("bad")
}
}
// func (p *RPCProxy) reconcileServerList(l *serverList) bool {
func TestRPCProxyInternal_reconcileServerList(t *testing.T) {
tests := []int{0, 1, 2, 3, 4, 5, 10, 100}
for _, n := range tests {
ok, err := test_reconcileServerList(n)
if !ok {
t.Errorf("Expected %d to pass: %v", n, err)
}
}
}
func test_reconcileServerList(maxServers int) (bool, error) {
// Build a server list, reconcile, verify the missing servers are
// missing, the added have been added, and the original server is
// present.
const failPct = 0.5
p := testRPCProxyFailProb(failPct)
var failedServers, healthyServers []*ServerEndpoint
for i := 0; i < maxServers; i++ {
nodeName := fmt.Sprintf("s%02d", i)
node := &ServerEndpoint{Name: nodeName}
// Add 66% of servers to RPCProxy
if rand.Float64() > 0.33 {
p.activateEndpoint(node)
// Of healthy servers, (ab)use connPoolPinger to
// failPct of the servers for the reconcile. This
// allows for the selected server to no longer be
// healthy for the reconcile below.
if ok, _ := p.connPoolPinger.PingNomadServer(p.configInfo.Region(), p.configInfo.RPCMajorVersion(), node); ok {
// Will still be present
healthyServers = append(healthyServers, node)
} else {
// Will be missing
failedServers = append(failedServers, node)
}
} else {
// Will be added from the call to reconcile
healthyServers = append(healthyServers, node)
}
}
// Randomize RPCProxy's server list
p.RebalanceServers()
selectedServer := p.FindServer()
var selectedServerFailed bool
for _, s := range failedServers {
if selectedServer.Key().Equal(s.Key()) {
selectedServerFailed = true
break
}
}
// Update RPCProxy's server list to be "healthy" based on Serf.
// Reconcile this with origServers, which is shuffled and has a live
// connection, but possibly out of date.
origServers := p.getServerList()
p.saveServerList(serverList{L: healthyServers})
// This should always succeed with non-zero server lists
if !selectedServerFailed && !p.reconcileServerList(&origServers) &&
len(p.getServerList().L) != 0 &&
len(origServers.L) != 0 {
// If the random gods are unfavorable and we end up with zero
// length lists, expect things to fail and retry the test.
return false, fmt.Errorf("Expected reconcile to succeed: %v %d %d",
selectedServerFailed,
len(p.getServerList().L),
len(origServers.L))
}
// If we have zero-length server lists, test succeeded in degenerate
// case.
if len(p.getServerList().L) == 0 &&
len(origServers.L) == 0 {
// Failed as expected w/ zero length list
return true, nil
}
resultingServerMap := make(map[EndpointKey]bool)
for _, s := range p.getServerList().L {
resultingServerMap[*s.Key()] = true
}
// Test to make sure no failed servers are in the RPCProxy's
// list. Error if there are any failedServers in l.servers
for _, s := range failedServers {
_, ok := resultingServerMap[*s.Key()]
if ok {
return false, fmt.Errorf("Found failed server %v in merged list %v", s, resultingServerMap)
}
}
// Test to make sure all healthy servers are in the healthy list.
if len(healthyServers) != len(p.getServerList().L) {
return false, fmt.Errorf("Expected healthy map and servers to match: %d/%d", len(healthyServers), len(healthyServers))
}
// Test to make sure all healthy servers are in the resultingServerMap list.
for _, s := range healthyServers {
_, ok := resultingServerMap[*s.Key()]
if !ok {
return false, fmt.Errorf("Server %v missing from healthy map after merged lists", s)
}
}
return true, nil
}
// func (l *serverList) refreshServerRebalanceTimer() {
func TestRPCProxyInternal_refreshServerRebalanceTimer(t *testing.T) {
type clusterSizes struct {
numNodes int
numServers int
minRebalance time.Duration
}
clusters := []clusterSizes{
{0, 3, 10 * time.Minute},
{1, 0, 10 * time.Minute}, // partitioned cluster
{1, 3, 10 * time.Minute},
{2, 3, 10 * time.Minute},
{100, 0, 10 * time.Minute}, // partitioned
{100, 1, 10 * time.Minute}, // partitioned
{100, 3, 10 * time.Minute},
{1024, 1, 10 * time.Minute}, // partitioned
{1024, 3, 10 * time.Minute}, // partitioned
{1024, 5, 10 * time.Minute},
{16384, 1, 10 * time.Minute}, // partitioned
{16384, 2, 10 * time.Minute}, // partitioned
{16384, 3, 10 * time.Minute}, // partitioned
{16384, 5, 10 * time.Minute},
{65535, 0, 10 * time.Minute}, // partitioned
{65535, 1, 10 * time.Minute}, // partitioned
{65535, 2, 10 * time.Minute}, // partitioned
{65535, 3, 10 * time.Minute}, // partitioned
{65535, 5, 10 * time.Minute}, // partitioned
{65535, 7, 10 * time.Minute},
{1000000, 1, 10 * time.Minute}, // partitioned
{1000000, 2, 10 * time.Minute}, // partitioned
{1000000, 3, 10 * time.Minute}, // partitioned
{1000000, 5, 10 * time.Minute}, // partitioned
{1000000, 11, 10 * time.Minute}, // partitioned
{1000000, 19, 10 * time.Minute},
}
logger := log.New(os.Stderr, "", log.LstdFlags)
shutdownCh := make(chan struct{})
for i, s := range clusters {
p := NewRPCProxy(logger, shutdownCh, &fauxSerf{numNodes: s.numNodes}, &fauxConnPool{})
for i := 0; i < s.numServers; i++ {
nodeName := fmt.Sprintf("s%02d", i)
p.activateEndpoint(&ServerEndpoint{Name: nodeName})
}
d := p.refreshServerRebalanceTimer()
if d < s.minRebalance {
t.Errorf("[%d] duration too short for cluster of size %d and %d servers (%s < %s)", i, s.numNodes, s.numServers, d, s.minRebalance)
}
}
}
// func (p *RPCProxy) saveServerList(l serverList) {
func TestRPCProxyInternal_saveServerList(t *testing.T) {
p := testRPCProxy()
// Initial condition
func() {
l := p.getServerList()
if len(l.L) != 0 {
t.Fatalf("RPCProxy.saveServerList failed to load init config")
}
newServer := new(ServerEndpoint)
l.L = append(l.L, newServer)
p.saveServerList(l)
}()
// Test that save works
func() {
l1 := p.getServerList()
t1NumServers := len(l1.L)
if t1NumServers != 1 {
t.Fatalf("RPCProxy.saveServerList failed to save mutated config")
}
}()
// Verify mutation w/o a save doesn't alter the original
func() {
newServer := new(ServerEndpoint)
l := p.getServerList()
l.L = append(l.L, newServer)
l_orig := p.getServerList()
origNumServers := len(l_orig.L)
if origNumServers >= len(l.L) {
t.Fatalf("RPCProxy.saveServerList unsaved config overwrote original")
}
}()
}