consul: Fixing graceful leave of current leader. Fixes #360.
This commit is contained in:
parent
b1d0611e0e
commit
b6c5d77cf8
|
@ -7,6 +7,7 @@ BUG FIXES:
|
||||||
* Fixing issue with Session ID and ACL ID generation. Previously,
|
* Fixing issue with Session ID and ACL ID generation. Previously,
|
||||||
sessions and tokens could be invalidated when the leader changed.
|
sessions and tokens could be invalidated when the leader changed.
|
||||||
* Fixing multiple headers for /v1/event/list endpoint [GH-361]
|
* Fixing multiple headers for /v1/event/list endpoint [GH-361]
|
||||||
|
* Fixing graceful leave of leader causing invalid Raft peers [GH-360]
|
||||||
|
|
||||||
## 0.4.0 (September 5, 2014)
|
## 0.4.0 (September 5, 2014)
|
||||||
|
|
||||||
|
|
|
@ -399,22 +399,30 @@ func (s *Server) handleReapMember(member serf.Member) error {
|
||||||
// handleDeregisterMember is used to deregister a member of a given reason
|
// handleDeregisterMember is used to deregister a member of a given reason
|
||||||
func (s *Server) handleDeregisterMember(reason string, member serf.Member) error {
|
func (s *Server) handleDeregisterMember(reason string, member serf.Member) error {
|
||||||
state := s.fsm.State()
|
state := s.fsm.State()
|
||||||
|
|
||||||
// Check if the node does not exists
|
// Check if the node does not exists
|
||||||
_, found, _ := state.GetNode(member.Name)
|
_, found, _ := state.GetNode(member.Name)
|
||||||
if !found {
|
if !found {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
s.logger.Printf("[INFO] consul: member '%s' %s, deregistering", member.Name, reason)
|
|
||||||
|
|
||||||
// Remove from Raft peers if this was a server
|
// Remove from Raft peers if this was a server
|
||||||
if valid, parts := isConsulServer(member); valid {
|
if valid, parts := isConsulServer(member); valid {
|
||||||
|
s.logger.Printf("[INFO] consul: server '%s' %s, removing as peer", member.Name, reason)
|
||||||
if err := s.removeConsulServer(member, parts.Port); err != nil {
|
if err := s.removeConsulServer(member, parts.Port); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do not deregister ourself. This can only happen if the current leader
|
||||||
|
// is leaving. Instead, we should allow a follower to take-over and
|
||||||
|
// deregister us later.
|
||||||
|
if member.Name == s.config.NodeName {
|
||||||
|
s.logger.Printf("[WARN] consul: deregistering self (%s) should be done by follower", s.config.NodeName)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Deregister the node
|
// Deregister the node
|
||||||
|
s.logger.Printf("[INFO] consul: member '%s' %s, deregistering", member.Name, reason)
|
||||||
req := structs.DeregisterRequest{
|
req := structs.DeregisterRequest{
|
||||||
Datacenter: s.config.Datacenter,
|
Datacenter: s.config.Datacenter,
|
||||||
Node: member.Name,
|
Node: member.Name,
|
||||||
|
@ -454,11 +462,6 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error {
|
||||||
|
|
||||||
// removeConsulServer is used to try to remove a consul server that has left
|
// removeConsulServer is used to try to remove a consul server that has left
|
||||||
func (s *Server) removeConsulServer(m serf.Member, port int) error {
|
func (s *Server) removeConsulServer(m serf.Member, port int) error {
|
||||||
// Do not remove ourself
|
|
||||||
if m.Name == s.config.NodeName {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to remove as peer
|
// Attempt to remove as peer
|
||||||
peer := &net.TCPAddr{IP: m.Addr, Port: port}
|
peer := &net.TCPAddr{IP: m.Addr, Port: port}
|
||||||
future := s.raft.RemovePeer(peer)
|
future := s.raft.RemovePeer(peer)
|
||||||
|
|
|
@ -3,12 +3,13 @@ package consul
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/hashicorp/consul/consul/structs"
|
|
||||||
"github.com/hashicorp/consul/testutil"
|
|
||||||
"github.com/hashicorp/serf/serf"
|
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/consul/structs"
|
||||||
|
"github.com/hashicorp/consul/testutil"
|
||||||
|
"github.com/hashicorp/serf/serf"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLeader_RegisterMember(t *testing.T) {
|
func TestLeader_RegisterMember(t *testing.T) {
|
||||||
|
@ -328,6 +329,73 @@ func TestLeader_LeftServer(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLeader_LeftLeader(t *testing.T) {
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerDCBootstrap(t, "dc1", false)
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
servers := []*Server{s1, s2, s3}
|
||||||
|
|
||||||
|
// Try to join
|
||||||
|
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||||
|
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
|
||||||
|
if _, err := s2.JoinLAN([]string{addr}); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := s3.JoinLAN([]string{addr}); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range servers {
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
peers, _ := s.raftPeers.Peers()
|
||||||
|
return len(peers) == 3, nil
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("should have 3 peers")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill the leader!
|
||||||
|
var leader *Server
|
||||||
|
for _, s := range servers {
|
||||||
|
if s.IsLeader() {
|
||||||
|
leader = s
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
leader.Leave()
|
||||||
|
leader.Shutdown()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var remain *Server
|
||||||
|
for _, s := range servers {
|
||||||
|
if s == leader {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
remain = s
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
peers, _ := s.raftPeers.Peers()
|
||||||
|
return len(peers) == 2, errors.New(fmt.Sprintf("%v", peers))
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("should have 2 peers: %v", err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the old leader is deregistered
|
||||||
|
state := remain.fsm.State()
|
||||||
|
_, found, _ := state.GetNode(leader.config.NodeName)
|
||||||
|
if found {
|
||||||
|
t.Fatalf("leader should be deregistered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestLeader_MultiBootstrap(t *testing.T) {
|
func TestLeader_MultiBootstrap(t *testing.T) {
|
||||||
dir1, s1 := testServer(t)
|
dir1, s1 := testServer(t)
|
||||||
defer os.RemoveAll(dir1)
|
defer os.RemoveAll(dir1)
|
||||||
|
|
Loading…
Reference in New Issue