consul: Fixing graceful leave of current leader. Fixes #360.
This commit is contained in:
parent
b1d0611e0e
commit
b6c5d77cf8
|
@ -7,6 +7,7 @@ BUG FIXES:
|
|||
* Fixing issue with Session ID and ACL ID generation. Previously,
|
||||
sessions and tokens could be invalidated when the leader changed.
|
||||
* Fixing multiple headers for /v1/event/list endpoint [GH-361]
|
||||
* Fixing graceful leave of leader causing invalid Raft peers [GH-360]
|
||||
|
||||
## 0.4.0 (September 5, 2014)
|
||||
|
||||
|
|
|
@ -399,22 +399,30 @@ func (s *Server) handleReapMember(member serf.Member) error {
|
|||
// handleDeregisterMember is used to deregister a member of a given reason
|
||||
func (s *Server) handleDeregisterMember(reason string, member serf.Member) error {
|
||||
state := s.fsm.State()
|
||||
|
||||
// Check if the node does not exists
|
||||
_, found, _ := state.GetNode(member.Name)
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: member '%s' %s, deregistering", member.Name, reason)
|
||||
|
||||
// Remove from Raft peers if this was a server
|
||||
if valid, parts := isConsulServer(member); valid {
|
||||
s.logger.Printf("[INFO] consul: server '%s' %s, removing as peer", member.Name, reason)
|
||||
if err := s.removeConsulServer(member, parts.Port); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Do not deregister ourself. This can only happen if the current leader
|
||||
// is leaving. Instead, we should allow a follower to take-over and
|
||||
// deregister us later.
|
||||
if member.Name == s.config.NodeName {
|
||||
s.logger.Printf("[WARN] consul: deregistering self (%s) should be done by follower", s.config.NodeName)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Deregister the node
|
||||
s.logger.Printf("[INFO] consul: member '%s' %s, deregistering", member.Name, reason)
|
||||
req := structs.DeregisterRequest{
|
||||
Datacenter: s.config.Datacenter,
|
||||
Node: member.Name,
|
||||
|
@ -454,11 +462,6 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error {
|
|||
|
||||
// removeConsulServer is used to try to remove a consul server that has left
|
||||
func (s *Server) removeConsulServer(m serf.Member, port int) error {
|
||||
// Do not remove ourself
|
||||
if m.Name == s.config.NodeName {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Attempt to remove as peer
|
||||
peer := &net.TCPAddr{IP: m.Addr, Port: port}
|
||||
future := s.raft.RemovePeer(peer)
|
||||
|
|
|
@ -3,12 +3,13 @@ package consul
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
func TestLeader_RegisterMember(t *testing.T) {
|
||||
|
@ -328,6 +329,73 @@ func TestLeader_LeftServer(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestLeader_LeftLeader(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
||||
defer os.RemoveAll(dir2)
|
||||
defer s2.Shutdown()
|
||||
|
||||
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
||||
defer os.RemoveAll(dir3)
|
||||
defer s3.Shutdown()
|
||||
servers := []*Server{s1, s2, s3}
|
||||
|
||||
// Try to join
|
||||
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
||||
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
for _, s := range servers {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
peers, _ := s.raftPeers.Peers()
|
||||
return len(peers) == 3, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("should have 3 peers")
|
||||
})
|
||||
}
|
||||
|
||||
// Kill the leader!
|
||||
var leader *Server
|
||||
for _, s := range servers {
|
||||
if s.IsLeader() {
|
||||
leader = s
|
||||
break
|
||||
}
|
||||
}
|
||||
leader.Leave()
|
||||
leader.Shutdown()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
var remain *Server
|
||||
for _, s := range servers {
|
||||
if s == leader {
|
||||
continue
|
||||
}
|
||||
remain = s
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
peers, _ := s.raftPeers.Peers()
|
||||
return len(peers) == 2, errors.New(fmt.Sprintf("%v", peers))
|
||||
}, func(err error) {
|
||||
t.Fatalf("should have 2 peers: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
// Verify the old leader is deregistered
|
||||
state := remain.fsm.State()
|
||||
_, found, _ := state.GetNode(leader.config.NodeName)
|
||||
if found {
|
||||
t.Fatalf("leader should be deregistered")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLeader_MultiBootstrap(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
|
Loading…
Reference in New Issue