424d70ba5d
Ended up removing the leader_test.go server address change test as part of this. The join was failing becase we were using a new node name with the new logic here, but realized this was hitting some of the memberlist conflict logic and not working as we expected. We need some additional work to fully support address changes, so removed the test for now.
933 lines
22 KiB
Go
933 lines
22 KiB
Go
package consul
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/consul/structs"
|
|
"github.com/hashicorp/consul/testutil"
|
|
"github.com/hashicorp/net-rpc-msgpackrpc"
|
|
"github.com/hashicorp/serf/serf"
|
|
)
|
|
|
|
func TestLeader_RegisterMember(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Client should be registered
|
|
state := s1.fsm.State()
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node != nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client not registered")
|
|
}
|
|
|
|
// Should have a check
|
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if len(checks) != 1 {
|
|
t.Fatalf("client missing check")
|
|
}
|
|
if checks[0].CheckID != SerfCheckID {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Name != SerfCheckName {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Status != structs.HealthPassing {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
|
|
// Server should be registered
|
|
_, node, err := state.GetNode(s1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node == nil {
|
|
t.Fatalf("server not registered")
|
|
}
|
|
|
|
// Service should be registered
|
|
_, services, err := state.NodeServices(nil, s1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, ok := services.Services["consul"]; !ok {
|
|
t.Fatalf("consul service not registered: %v", services)
|
|
}
|
|
}
|
|
|
|
func TestLeader_FailedMember(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Fail the member
|
|
c1.Shutdown()
|
|
|
|
// Should be registered
|
|
state := s1.fsm.State()
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node != nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client not registered")
|
|
}
|
|
|
|
// Should have a check
|
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if len(checks) != 1 {
|
|
t.Fatalf("client missing check")
|
|
}
|
|
if checks[0].CheckID != SerfCheckID {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Name != SerfCheckName {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, checks, err = state.NodeChecks(nil, c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return checks[0].Status == structs.HealthCritical, errors.New(checks[0].Status)
|
|
}); err != nil {
|
|
t.Fatalf("check status is %v, should be critical", err)
|
|
}
|
|
}
|
|
|
|
func TestLeader_LeftMember(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
state := s1.fsm.State()
|
|
|
|
// Should be registered
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node != nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client should be registered")
|
|
}
|
|
|
|
// Node should leave
|
|
c1.Leave()
|
|
c1.Shutdown()
|
|
|
|
// Should be deregistered
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node == nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client should not be registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_ReapMember(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
state := s1.fsm.State()
|
|
|
|
// Should be registered
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node != nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client should be registered")
|
|
}
|
|
|
|
// Simulate a node reaping
|
|
mems := s1.LANMembers()
|
|
var c1mem serf.Member
|
|
for _, m := range mems {
|
|
if m.Name == c1.config.NodeName {
|
|
c1mem = m
|
|
c1mem.Status = StatusReap
|
|
break
|
|
}
|
|
}
|
|
s1.reconcileCh <- c1mem
|
|
|
|
// Should be deregistered; we have to poll quickly here because
|
|
// anti-entropy will put it back.
|
|
reaped := false
|
|
for start := time.Now(); time.Since(start) < 5*time.Second; {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node == nil {
|
|
reaped = true
|
|
break
|
|
}
|
|
}
|
|
if !reaped {
|
|
t.Fatalf("client should not be registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_Reconcile_ReapMember(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Register a non-existing member
|
|
dead := structs.RegisterRequest{
|
|
Datacenter: s1.config.Datacenter,
|
|
Node: "no-longer-around",
|
|
Address: "127.1.1.1",
|
|
Check: &structs.HealthCheck{
|
|
Node: "no-longer-around",
|
|
CheckID: SerfCheckID,
|
|
Name: SerfCheckName,
|
|
Status: structs.HealthCritical,
|
|
},
|
|
WriteRequest: structs.WriteRequest{
|
|
Token: "root",
|
|
},
|
|
}
|
|
var out struct{}
|
|
if err := s1.RPC("Catalog.Register", &dead, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Force a reconciliation
|
|
if err := s1.reconcile(); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Node should be gone
|
|
state := s1.fsm.State()
|
|
_, node, err := state.GetNode("no-longer-around")
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node != nil {
|
|
t.Fatalf("client registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_Reconcile(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.ACLEnforceVersion8 = true
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Join before we have a leader, this should cause a reconcile!
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Should not be registered
|
|
state := s1.fsm.State()
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node != nil {
|
|
t.Fatalf("client registered")
|
|
}
|
|
|
|
// Should be registered
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err = state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node != nil, nil
|
|
}); err != nil {
|
|
t.Fatal("client should be registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_Reconcile_Races(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait for the server to reconcile the client and register it.
|
|
state := s1.fsm.State()
|
|
var nodeAddr string
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node != nil {
|
|
nodeAddr = node.Address
|
|
return true, nil
|
|
} else {
|
|
return false, nil
|
|
}
|
|
}); err != nil {
|
|
t.Fatalf("client should be registered: %v", err)
|
|
}
|
|
|
|
// Add in some metadata via the catalog (as if the agent synced it
|
|
// there). We also set the serfHealth check to failing so the reconile
|
|
// will attempt to flip it back
|
|
req := structs.RegisterRequest{
|
|
Datacenter: s1.config.Datacenter,
|
|
Node: c1.config.NodeName,
|
|
ID: c1.config.NodeID,
|
|
Address: nodeAddr,
|
|
NodeMeta: map[string]string{"hello": "world"},
|
|
Check: &structs.HealthCheck{
|
|
Node: c1.config.NodeName,
|
|
CheckID: SerfCheckID,
|
|
Name: SerfCheckName,
|
|
Status: structs.HealthCritical,
|
|
Output: "",
|
|
},
|
|
}
|
|
var out struct{}
|
|
if err := s1.RPC("Catalog.Register", &req, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Force a reconcile and make sure the metadata stuck around.
|
|
if err := s1.reconcile(); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
_, node, err := state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node == nil {
|
|
t.Fatalf("bad")
|
|
}
|
|
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
|
t.Fatalf("bad")
|
|
}
|
|
|
|
// Fail the member and wait for the health to go critical.
|
|
c1.Shutdown()
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, checks, err := state.NodeChecks(nil, c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return checks[0].Status == structs.HealthCritical, errors.New(checks[0].Status)
|
|
}); err != nil {
|
|
t.Fatalf("check status should be critical: %v", err)
|
|
}
|
|
|
|
// Make sure the metadata didn't get clobbered.
|
|
_, node, err = state.GetNode(c1.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if node == nil {
|
|
t.Fatalf("bad")
|
|
}
|
|
if hello, ok := node.Meta["hello"]; !ok || hello != "world" {
|
|
t.Fatalf("bad")
|
|
}
|
|
}
|
|
|
|
func TestLeader_LeftServer(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
// Kill any server
|
|
servers[0].Shutdown()
|
|
|
|
// Force remove the non-leader (transition to left state)
|
|
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers[1:] {
|
|
peers, _ := s.numPeers()
|
|
return peers == 2, errors.New(fmt.Sprintf("%d", peers))
|
|
}
|
|
|
|
return true, nil
|
|
}); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func TestLeader_LeftLeader(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
|
|
// Kill the leader!
|
|
var leader *Server
|
|
for _, s := range servers {
|
|
if s.IsLeader() {
|
|
leader = s
|
|
break
|
|
}
|
|
}
|
|
if leader == nil {
|
|
t.Fatalf("Should have a leader")
|
|
}
|
|
leader.Leave()
|
|
leader.Shutdown()
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
var remain *Server
|
|
for _, s := range servers {
|
|
if s == leader {
|
|
continue
|
|
}
|
|
remain = s
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 2, errors.New(fmt.Sprintf("%d", peers))
|
|
}); err != nil {
|
|
t.Fatal("should have 2 peers")
|
|
}
|
|
}
|
|
|
|
// Verify the old leader is deregistered
|
|
state := remain.fsm.State()
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
_, node, err := state.GetNode(leader.config.NodeName)
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return node == nil, nil
|
|
}); err != nil {
|
|
t.Fatal("should be deregistered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_MultiBootstrap(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServer(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
servers := []*Server{s1, s2}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers := s.serfLAN.Members()
|
|
return len(peers) == 2, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 2 peerss")
|
|
}
|
|
}
|
|
|
|
// Ensure we don't have multiple raft peers
|
|
for _, s := range servers {
|
|
peers, _ := s.numPeers()
|
|
if peers != 1 {
|
|
t.Fatalf("should only have 1 raft peer!")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestLeader_TombstoneGC_Reset(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
|
|
var leader *Server
|
|
for _, s := range servers {
|
|
if s.IsLeader() {
|
|
leader = s
|
|
break
|
|
}
|
|
}
|
|
if leader == nil {
|
|
t.Fatalf("Should have a leader")
|
|
}
|
|
|
|
// Check that the leader has a pending GC expiration
|
|
if !leader.tombstoneGC.PendingExpiration() {
|
|
t.Fatalf("should have pending expiration")
|
|
}
|
|
|
|
// Kill the leader
|
|
leader.Shutdown()
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Wait for a new leader
|
|
leader = nil
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
for _, s := range servers {
|
|
if s.IsLeader() {
|
|
leader = s
|
|
return true, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}); err != nil {
|
|
t.Fatal("should have leader")
|
|
}
|
|
|
|
// Check that the new leader has a pending GC expiration
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
return leader.tombstoneGC.PendingExpiration(), nil
|
|
}); err != nil {
|
|
t.Fatal("should have pending expiration")
|
|
}
|
|
}
|
|
|
|
func TestLeader_ReapTombstones(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.ACLDatacenter = "dc1"
|
|
c.ACLMasterToken = "root"
|
|
c.ACLDefaultPolicy = "deny"
|
|
c.TombstoneTTL = 50 * time.Millisecond
|
|
c.TombstoneTTLGranularity = 10 * time.Millisecond
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
codec := rpcClient(t, s1)
|
|
|
|
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
|
|
|
// Create a KV entry
|
|
arg := structs.KVSRequest{
|
|
Datacenter: "dc1",
|
|
Op: structs.KVSSet,
|
|
DirEnt: structs.DirEntry{
|
|
Key: "test",
|
|
Value: []byte("test"),
|
|
},
|
|
WriteRequest: structs.WriteRequest{
|
|
Token: "root",
|
|
},
|
|
}
|
|
var out bool
|
|
if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &arg, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Delete the KV entry (tombstoned).
|
|
arg.Op = structs.KVSDelete
|
|
if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &arg, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Make sure there's a tombstone.
|
|
state := s1.fsm.State()
|
|
func() {
|
|
snap := state.Snapshot()
|
|
defer snap.Close()
|
|
stones, err := snap.Tombstones()
|
|
if err != nil {
|
|
t.Fatalf("err: %s", err)
|
|
}
|
|
if stones.Next() == nil {
|
|
t.Fatalf("missing tombstones")
|
|
}
|
|
if stones.Next() != nil {
|
|
t.Fatalf("unexpected extra tombstones")
|
|
}
|
|
}()
|
|
|
|
// Check that the new leader has a pending GC expiration by
|
|
// watching for the tombstone to get removed.
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
snap := state.Snapshot()
|
|
defer snap.Close()
|
|
stones, err := snap.Tombstones()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return stones.Next() == nil, nil
|
|
}); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func TestLeader_RollRaftServer(t *testing.T) {
|
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
|
c.Bootstrap = true
|
|
c.Datacenter = "dc1"
|
|
})
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
|
c.Bootstrap = false
|
|
c.Datacenter = "dc1"
|
|
c.RaftConfig.ProtocolVersion = 1
|
|
})
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
|
|
// Kill the v1 server
|
|
s2.Shutdown()
|
|
|
|
for _, s := range []*Server{s1, s3} {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
minVer, err := ServerMinRaftProtocol(s.LANMembers())
|
|
return minVer == 2, err
|
|
}); err != nil {
|
|
t.Fatalf("minimum protocol version among servers should be 2")
|
|
}
|
|
}
|
|
|
|
// Replace the dead server with one running raft protocol v3
|
|
dir4, s4 := testServerWithConfig(t, func(c *Config) {
|
|
c.Bootstrap = false
|
|
c.Datacenter = "dc1"
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
})
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
if _, err := s4.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
servers[1] = s4
|
|
|
|
// Make sure the dead server is removed and we're back to 3 total peers
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
addrs := 0
|
|
ids := 0
|
|
future := s.raft.GetConfiguration()
|
|
if err := future.Error(); err != nil {
|
|
return false, err
|
|
}
|
|
for _, server := range future.Configuration().Servers {
|
|
if string(server.ID) == string(server.Address) {
|
|
addrs++
|
|
} else {
|
|
ids++
|
|
}
|
|
}
|
|
return addrs == 2 && ids == 1, nil
|
|
}); err != nil {
|
|
t.Fatalf("should see 2 legacy IDs and 1 GUID")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestLeader_ChangeServerID(t *testing.T) {
|
|
conf := func(c *Config) {
|
|
c.Bootstrap = false
|
|
c.BootstrapExpect = 3
|
|
c.Datacenter = "dc1"
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
}
|
|
dir1, s1 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerWithConfig(t, conf)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
|
|
// Shut down a server, freeing up its address/port
|
|
s3.Shutdown()
|
|
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
alive := 0
|
|
for _, m := range s1.LANMembers() {
|
|
if m.Status == serf.StatusAlive {
|
|
alive++
|
|
}
|
|
}
|
|
return alive == 2, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 2 alive members")
|
|
}
|
|
|
|
// Bring up a new server with s3's address that will get a different ID
|
|
dir4, s4 := testServerWithConfig(t, func(c *Config) {
|
|
c.Bootstrap = false
|
|
c.BootstrapExpect = 3
|
|
c.Datacenter = "dc1"
|
|
c.RaftConfig.ProtocolVersion = 3
|
|
c.SerfLANConfig.MemberlistConfig = s3.config.SerfLANConfig.MemberlistConfig
|
|
c.RPCAddr = s3.config.RPCAddr
|
|
c.RPCAdvertise = s3.config.RPCAdvertise
|
|
})
|
|
defer os.RemoveAll(dir4)
|
|
defer s4.Shutdown()
|
|
if _, err := s4.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
servers[2] = s4
|
|
|
|
// Make sure the dead server is removed and we're back to 3 total peers
|
|
for _, s := range servers {
|
|
if err := testutil.WaitForResult(func() (bool, error) {
|
|
peers, _ := s.numPeers()
|
|
return peers == 3, nil
|
|
}); err != nil {
|
|
t.Fatal("should have 3 peers")
|
|
}
|
|
}
|
|
}
|