396 lines
8.7 KiB
Go
396 lines
8.7 KiB
Go
package consul
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/hashicorp/consul/testutil"
|
|
"github.com/hashicorp/consul/consul/structs"
|
|
"github.com/hashicorp/serf/serf"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestLeader_RegisterMember(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
client := rpcClient(t, s1)
|
|
testutil.WaitForLeader(t, client.Call, "dc1")
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait for registration
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
// Client should be registered
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode(c1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("client not registered")
|
|
}
|
|
|
|
// Should have a check
|
|
_, checks := state.NodeChecks(c1.config.NodeName)
|
|
if len(checks) != 1 {
|
|
t.Fatalf("client missing check")
|
|
}
|
|
if checks[0].CheckID != SerfCheckID {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Name != SerfCheckName {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Status != structs.HealthPassing {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
|
|
// Server should be registered
|
|
_, found, _ = state.GetNode(s1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("server not registered")
|
|
}
|
|
|
|
// Service should be registered
|
|
_, services := state.NodeServices(s1.config.NodeName)
|
|
if _, ok := services.Services["consul"]; !ok {
|
|
t.Fatalf("consul service not registered: %v", services)
|
|
}
|
|
}
|
|
|
|
func TestLeader_FailedMember(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Wait until we have a leader
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Fail the member
|
|
c1.Shutdown()
|
|
|
|
// Wait for failure detection
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Should be registered
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode(c1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("client not registered")
|
|
}
|
|
|
|
// Should have a check
|
|
_, checks := state.NodeChecks(c1.config.NodeName)
|
|
if len(checks) != 1 {
|
|
t.Fatalf("client missing check")
|
|
}
|
|
if checks[0].CheckID != SerfCheckID {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Name != SerfCheckName {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
if checks[0].Status != structs.HealthCritical {
|
|
t.Fatalf("bad check: %v", checks[0])
|
|
}
|
|
}
|
|
|
|
func TestLeader_LeftMember(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Wait until we have a leader
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait for registration
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
// Should be registered
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode(c1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("client not registered")
|
|
}
|
|
|
|
// Node should leave
|
|
c1.Leave()
|
|
c1.Shutdown()
|
|
|
|
// Wait for failure detection
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Should be deregistered
|
|
_, found, _ = state.GetNode(c1.config.NodeName)
|
|
if found {
|
|
t.Fatalf("client registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_ReapMember(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Wait until we have a leader
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait for registration
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
// Should be registered
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode(c1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("client not registered")
|
|
}
|
|
|
|
// Simulate a node reaping
|
|
mems := s1.LANMembers()
|
|
var c1mem serf.Member
|
|
for _, m := range mems {
|
|
if m.Name == c1.config.NodeName {
|
|
c1mem = m
|
|
c1mem.Status = StatusReap
|
|
break
|
|
}
|
|
}
|
|
s1.reconcileCh <- c1mem
|
|
|
|
// Wait to reconcile
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
// Should be deregistered
|
|
_, found, _ = state.GetNode(c1.config.NodeName)
|
|
if found {
|
|
t.Fatalf("client registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_Reconcile_ReapMember(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
// Wait until we have a leader
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Register a non-existing member
|
|
dead := structs.RegisterRequest{
|
|
Datacenter: s1.config.Datacenter,
|
|
Node: "no-longer-around",
|
|
Address: "127.1.1.1",
|
|
Check: &structs.HealthCheck{
|
|
Node: "no-longer-around",
|
|
CheckID: SerfCheckID,
|
|
Name: SerfCheckName,
|
|
Status: structs.HealthCritical,
|
|
},
|
|
}
|
|
var out struct{}
|
|
if err := s1.RPC("Catalog.Register", &dead, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Force a reconciliation
|
|
if err := s1.reconcile(); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Node should be gone
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode("no-longer-around")
|
|
if found {
|
|
t.Fatalf("client registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_Reconcile(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, c1 := testClient(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer c1.Shutdown()
|
|
|
|
// Join before we have a leader, this should cause a reconcile!
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := c1.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Should not be registered
|
|
state := s1.fsm.State()
|
|
_, found, _ := state.GetNode(c1.config.NodeName)
|
|
if found {
|
|
t.Fatalf("client registered")
|
|
}
|
|
|
|
// Wait for leader
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Should be registered
|
|
_, found, _ = state.GetNode(c1.config.NodeName)
|
|
if !found {
|
|
t.Fatalf("client not registered")
|
|
}
|
|
}
|
|
|
|
func TestLeader_LeftServer(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
|
defer os.RemoveAll(dir3)
|
|
defer s3.Shutdown()
|
|
servers := []*Server{s1, s2, s3}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait until we have 3 peers
|
|
start := time.Now()
|
|
CHECK1:
|
|
for _, s := range servers {
|
|
peers, _ := s.raftPeers.Peers()
|
|
if len(peers) != 3 {
|
|
if time.Now().Sub(start) >= 2*time.Second {
|
|
t.Fatalf("should have 3 peers")
|
|
} else {
|
|
time.Sleep(100 * time.Millisecond)
|
|
goto CHECK1
|
|
}
|
|
}
|
|
}
|
|
|
|
// Kill any server
|
|
servers[0].Shutdown()
|
|
|
|
// Wait for failure detection
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Force remove the non-leader (transition to left state)
|
|
if err := servers[1].RemoveFailedNode(servers[0].config.NodeName); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait for intent propagation
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Wait until we have 2 peers
|
|
start = time.Now()
|
|
CHECK2:
|
|
for _, s := range servers[1:] {
|
|
peers, _ := s.raftPeers.Peers()
|
|
if len(peers) != 2 {
|
|
if time.Now().Sub(start) >= 2*time.Second {
|
|
t.Fatalf("should have 2 peers")
|
|
} else {
|
|
time.Sleep(100 * time.Millisecond)
|
|
goto CHECK2
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestLeader_MultiBootstrap(t *testing.T) {
|
|
dir1, s1 := testServer(t)
|
|
defer os.RemoveAll(dir1)
|
|
defer s1.Shutdown()
|
|
|
|
dir2, s2 := testServer(t)
|
|
defer os.RemoveAll(dir2)
|
|
defer s2.Shutdown()
|
|
|
|
servers := []*Server{s1, s2}
|
|
|
|
// Try to join
|
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
|
|
// Wait until we have 2 peers
|
|
start := time.Now()
|
|
CHECK1:
|
|
for _, s := range servers {
|
|
peers := s.serfLAN.Members()
|
|
if len(peers) != 2 {
|
|
if time.Now().Sub(start) >= 2*time.Second {
|
|
t.Fatalf("should have 2 peers")
|
|
} else {
|
|
time.Sleep(100 * time.Millisecond)
|
|
goto CHECK1
|
|
}
|
|
}
|
|
}
|
|
|
|
// Wait to ensure no peer is added
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
// Ensure we don't have multiple raft peers
|
|
for _, s := range servers {
|
|
peers, _ := s.raftPeers.Peers()
|
|
if len(peers) != 1 {
|
|
t.Fatalf("should only have 1 raft peer!")
|
|
}
|
|
}
|
|
}
|