f3b63514d5
NotifyShutdown was only used for testing. Now that t.Cleanup exists, we can use that instead of attaching cleanup to the Server shutdown. The Autopilot test which used NotifyShutdown doesn't need this notification because Shutdown is synchronous. Waiting for the function to return is equivalent.
488 lines
11 KiB
Go
488 lines
11 KiB
Go
package consul
|
|
|
|
import (
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/sdk/testutil/retry"
|
|
"github.com/hashicorp/consul/testrpc"
|
|
"github.com/hashicorp/raft"
|
|
"github.com/hashicorp/serf/serf"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestAutopilot_IdempotentShutdown(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, nil)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
retry.Run(t, func(r *retry.R) { r.Check(waitForLeader(s1)) })
|
|
|
|
s1.autopilot.Start()
|
|
s1.autopilot.Start()
|
|
s1.autopilot.Start()
|
|
s1.autopilot.Stop()
|
|
s1.autopilot.Stop()
|
|
s1.autopilot.Stop()
|
|
}
|
|
|
|
func TestAutopilot_CleanupDeadServer(t *testing.T) {
|
|
t.Parallel()
|
|
for i := 1; i <= 3; i++ {
|
|
testCleanupDeadServer(t, i)
|
|
}
|
|
}
|
|
|
|
func testCleanupDeadServer(t *testing.T, raftVersion int) {
|
|
dc := "dc1"
|
|
conf := func(c *Config) {
|
|
c.Datacenter = dc
|
|
c.Bootstrap = false
|
|
c.BootstrapExpect = 5
|
|
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(raftVersion)
|
|
}
|
|
dir1, s1 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
|
|
dir5, s5 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir5)
|
|
defer s5.Shutdown()
|
|
|
|
servers := []*Server{s1, s2, s3, s4, s5}
|
|
|
|
// Try to join
|
|
joinLAN(t, s2, s1)
|
|
joinLAN(t, s3, s1)
|
|
joinLAN(t, s4, s1)
|
|
joinLAN(t, s5, s1)
|
|
|
|
for _, s := range servers {
|
|
testrpc.WaitForLeader(t, s.RPC, dc)
|
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 5)) })
|
|
}
|
|
|
|
require := require.New(t)
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
leaderIndex := -1
|
|
for i, s := range servers {
|
|
if s.IsLeader() {
|
|
leaderIndex = i
|
|
break
|
|
}
|
|
}
|
|
require.NotEqual(leaderIndex, -1)
|
|
|
|
// Shutdown two non-leader servers
|
|
killed := make(map[string]struct{})
|
|
for i, s := range servers {
|
|
if i != leaderIndex {
|
|
s.Shutdown()
|
|
killed[string(s.config.NodeID)] = struct{}{}
|
|
}
|
|
if len(killed) == 2 {
|
|
break
|
|
}
|
|
}
|
|
|
|
retry.Run(t, func(r *retry.R) {
|
|
alive := 0
|
|
for _, m := range servers[leaderIndex].LANMembers() {
|
|
if m.Status == serf.StatusAlive {
|
|
alive++
|
|
}
|
|
}
|
|
if alive != 3 {
|
|
r.Fatalf("Expected three alive servers instead of %d", alive)
|
|
}
|
|
})
|
|
|
|
// Make sure the dead servers are removed and we're back to 3 total peers
|
|
for _, s := range servers {
|
|
_, killed := killed[string(s.config.NodeID)]
|
|
if !killed {
|
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestAutopilot_CleanupDeadNonvoter(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Have s2 join and then shut it down immediately before it gets a chance to
|
|
// be promoted to a voter.
|
|
joinLAN(t, s2, s1)
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft([]*Server{s1, s2}))
|
|
})
|
|
s2.Shutdown()
|
|
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft([]*Server{s1}))
|
|
})
|
|
}
|
|
|
|
func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) {
|
|
t.Parallel()
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
conf := func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = false
|
|
}
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
|
|
dir5, s5 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir5)
|
|
defer s5.Shutdown()
|
|
|
|
servers := []*Server{s1, s2, s3, s4, s5}
|
|
|
|
// Join the servers to s1, and wait until they are all promoted to
|
|
// voters.
|
|
for _, s := range servers[1:] {
|
|
joinLAN(t, s, s1)
|
|
}
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft(servers))
|
|
for _, s := range servers {
|
|
r.Check(wantPeers(s, 5))
|
|
}
|
|
})
|
|
|
|
// Kill a non-leader server
|
|
s4.Shutdown()
|
|
|
|
// Should be removed from the peers automatically
|
|
servers = []*Server{s1, s2, s3, s5}
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft(servers))
|
|
for _, s := range servers {
|
|
r.Check(wantPeers(s, 4))
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestAutopilot_RollingUpdate(t *testing.T) {
|
|
t.Parallel()
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
conf := func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = false
|
|
}
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
// Join the servers to s1, and wait until they are all promoted to
|
|
// voters.
|
|
servers := []*Server{s1, s2, s3}
|
|
for _, s := range servers[1:] {
|
|
joinLAN(t, s, s1)
|
|
}
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft(servers))
|
|
for _, s := range servers {
|
|
r.Check(wantPeers(s, 3))
|
|
}
|
|
})
|
|
|
|
// Add one more server like we are doing a rolling update.
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
joinLAN(t, s1, s4)
|
|
servers = append(servers, s4)
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft(servers))
|
|
for _, s := range servers {
|
|
r.Check(wantPeers(s, 3))
|
|
}
|
|
})
|
|
|
|
// Now kill one of the "old" nodes like we are doing a rolling update.
|
|
s3.Shutdown()
|
|
|
|
isVoter := func() bool {
|
|
future := s1.raft.GetConfiguration()
|
|
if err := future.Error(); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
for _, s := range future.Configuration().Servers {
|
|
if string(s.ID) == string(s4.config.NodeID) {
|
|
return s.Suffrage == raft.Voter
|
|
}
|
|
}
|
|
t.Fatalf("didn't find s4")
|
|
return false
|
|
}
|
|
|
|
// Wait for s4 to stabilize, get promoted to a voter, and for s3 to be
|
|
// removed.
|
|
servers = []*Server{s1, s2, s4}
|
|
retry.Run(t, func(r *retry.R) {
|
|
r.Check(wantRaft(servers))
|
|
for _, s := range servers {
|
|
r.Check(wantPeers(s, 3))
|
|
}
|
|
if !isVoter() {
|
|
r.Fatalf("should be a voter")
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestAutopilot_CleanupStaleRaftServer(t *testing.T) {
|
|
t.Parallel()
|
|
dir1, s1 := testServerDCBootstrap(t, "dc1", true)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
dir4, s4 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Join the servers to s1
|
|
for _, s := range servers[1:] {
|
|
joinLAN(t, s, s1)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
|
}
|
|
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Add s4 to peers directly
|
|
addVoterFuture := s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(joinAddrLAN(s4)), 0, 0)
|
|
if err := addVoterFuture.Error(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Verify we have 4 peers
|
|
peers, err := s1.numPeers()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if peers != 4 {
|
|
t.Fatalf("bad: %v", peers)
|
|
}
|
|
|
|
// Wait for s4 to be removed
|
|
for _, s := range []*Server{s1, s2, s3} {
|
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
|
}
|
|
}
|
|
|
|
func TestAutopilot_PromoteNonVoter(t *testing.T) {
|
|
t.Parallel()
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = true
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
c.AutopilotConfig.ServerStabilizationTime = 200 * time.Millisecond
|
|
c.ServerHealthInterval = 100 * time.Millisecond
|
|
c.AutopilotInterval = 100 * time.Millisecond
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
codec := rpcClient(t, s1)
|
|
defer codec.Close()
|
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
c.Datacenter = "dc1"
|
|
c.Bootstrap = false
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
})
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
joinLAN(t, s2, s1)
|
|
|
|
// Make sure we see it as a nonvoter initially. We wait until half
|
|
// the stabilization period has passed.
|
|
retry.Run(t, func(r *retry.R) {
|
|
future := s1.raft.GetConfiguration()
|
|
if err := future.Error(); err != nil {
|
|
r.Fatal(err)
|
|
}
|
|
|
|
servers := future.Configuration().Servers
|
|
if len(servers) != 2 {
|
|
r.Fatalf("bad: %v", servers)
|
|
}
|
|
if servers[1].Suffrage != raft.Nonvoter {
|
|
r.Fatalf("bad: %v", servers)
|
|
}
|
|
health := s1.autopilot.GetServerHealth(string(servers[1].ID))
|
|
if health == nil {
|
|
r.Fatal("nil health")
|
|
}
|
|
if !health.Healthy {
|
|
r.Fatalf("bad: %v", health)
|
|
}
|
|
if time.Since(health.StableSince) < s1.config.AutopilotConfig.ServerStabilizationTime/2 {
|
|
r.Fatal("stable period not elapsed")
|
|
}
|
|
})
|
|
|
|
// Make sure it ends up as a voter.
|
|
retry.Run(t, func(r *retry.R) {
|
|
future := s1.raft.GetConfiguration()
|
|
if err := future.Error(); err != nil {
|
|
r.Fatal(err)
|
|
}
|
|
|
|
servers := future.Configuration().Servers
|
|
if len(servers) != 2 {
|
|
r.Fatalf("bad: %v", servers)
|
|
}
|
|
if servers[1].Suffrage != raft.Voter {
|
|
r.Fatalf("bad: %v", servers)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestAutopilot_MinQuorum(t *testing.T) {
|
|
dc := "dc1"
|
|
conf := func(c *Config) {
|
|
c.Datacenter = dc
|
|
c.Bootstrap = false
|
|
c.BootstrapExpect = 4
|
|
c.AutopilotConfig.MinQuorum = 3
|
|
c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(2)
|
|
c.AutopilotInterval = 100 * time.Millisecond
|
|
}
|
|
dir1, s1 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
dir4, s4 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
|
|
servers := map[string]*Server{s1.config.NodeName: s1,
|
|
s2.config.NodeName: s2,
|
|
s3.config.NodeName: s3,
|
|
s4.config.NodeName: s4}
|
|
|
|
// Try to join
|
|
joinLAN(t, s2, s1)
|
|
joinLAN(t, s3, s1)
|
|
joinLAN(t, s4, s1)
|
|
|
|
//Differentiate between leader and server
|
|
findStatus := func(leader bool) *Server {
|
|
for _, mem := range servers {
|
|
if mem.IsLeader() == leader {
|
|
return mem
|
|
}
|
|
if !mem.IsLeader() == !leader {
|
|
return mem
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
testrpc.WaitForLeader(t, s1.RPC, dc)
|
|
|
|
// Have autopilot take one into left
|
|
dead := findStatus(false)
|
|
if dead == nil {
|
|
t.Fatalf("no members set")
|
|
}
|
|
require.NoError(t, dead.Shutdown())
|
|
retry.Run(t, func(r *retry.R) {
|
|
leader := findStatus(true)
|
|
if leader == nil {
|
|
r.Fatalf("no members set")
|
|
}
|
|
for _, m := range leader.LANMembers() {
|
|
if m.Name == dead.config.NodeName && m.Status != serf.StatusLeft {
|
|
r.Fatalf("%v should be left, got %v", m.Name, m.Status.String())
|
|
}
|
|
}
|
|
})
|
|
|
|
delete(servers, dead.config.NodeName)
|
|
//Autopilot should not take this one into left
|
|
dead = findStatus(false)
|
|
require.NoError(t, dead.Shutdown())
|
|
|
|
retry.Run(t, func(r *retry.R) {
|
|
leader := findStatus(true)
|
|
if leader == nil {
|
|
r.Fatalf("no members set")
|
|
}
|
|
for _, m := range leader.LANMembers() {
|
|
if m.Name == dead.config.NodeName && m.Status != serf.StatusFailed {
|
|
r.Fatalf("%v should be failed, got %v", m.Name, m.Status.String())
|
|
}
|
|
}
|
|
})
|
|
}
|