2013-12-07 01:18:09 +00:00
|
|
|
package consul
|
2013-12-07 00:54:33 +00:00
|
|
|
|
2013-12-09 23:29:44 +00:00
|
|
|
import (
|
2013-12-10 00:05:15 +00:00
|
|
|
"github.com/hashicorp/raft"
|
2013-12-09 23:29:44 +00:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2013-12-10 00:05:15 +00:00
|
|
|
"net"
|
|
|
|
"time"
|
2013-12-09 23:29:44 +00:00
|
|
|
)
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
// lanEventHandler is used to handle events from the lan Serf cluster
|
|
|
|
func (s *Server) lanEventHandler() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case e := <-s.eventChLAN:
|
2013-12-09 23:29:44 +00:00
|
|
|
switch e.EventType() {
|
|
|
|
case serf.EventMemberJoin:
|
|
|
|
s.localJoin(e.(serf.MemberEvent))
|
2014-01-09 23:49:09 +00:00
|
|
|
fallthrough
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventMemberLeave:
|
2014-01-09 23:49:09 +00:00
|
|
|
fallthrough
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventMemberFailed:
|
2014-01-09 23:49:09 +00:00
|
|
|
s.localMemberEvent(e.(serf.MemberEvent))
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventUser:
|
|
|
|
default:
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: unhandled LAN Serf Event: %#v", e)
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
case <-s.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// wanEventHandler is used to handle events from the wan Serf cluster
|
|
|
|
func (s *Server) wanEventHandler() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case e := <-s.eventChWAN:
|
2013-12-09 23:29:44 +00:00
|
|
|
switch e.EventType() {
|
|
|
|
case serf.EventMemberJoin:
|
|
|
|
s.remoteJoin(e.(serf.MemberEvent))
|
|
|
|
case serf.EventMemberLeave:
|
2013-12-12 00:24:34 +00:00
|
|
|
fallthrough
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventMemberFailed:
|
|
|
|
s.remoteFailed(e.(serf.MemberEvent))
|
|
|
|
case serf.EventUser:
|
|
|
|
default:
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: unhandled WAN Serf Event: %#v", e)
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
case <-s.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2014-01-09 23:49:09 +00:00
|
|
|
// localMemberEvent is used to reconcile Serf events with the strongly
|
|
|
|
// consistent store if we are the current leader
|
|
|
|
func (s *Server) localMemberEvent(me serf.MemberEvent) {
|
|
|
|
// Do nothing if we are not the leader
|
|
|
|
if !s.IsLeader() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dispatch an async handler for each member
|
|
|
|
for _, m := range me.Members {
|
|
|
|
go s.reconcileMember(m)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-09 23:29:44 +00:00
|
|
|
// localJoin is used to handle join events on the lan serf cluster
|
|
|
|
func (s *Server) localJoin(me serf.MemberEvent) {
|
2013-12-10 00:05:15 +00:00
|
|
|
// Check for consul members
|
|
|
|
for _, m := range me.Members {
|
2013-12-19 22:37:54 +00:00
|
|
|
ok, dc, port := isConsulServer(m)
|
2013-12-12 00:24:34 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if dc != s.config.Datacenter {
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: server %s for datacenter %s has joined wrong cluster",
|
2013-12-12 00:24:34 +00:00
|
|
|
m.Name, dc)
|
|
|
|
continue
|
2013-12-10 00:05:15 +00:00
|
|
|
}
|
2013-12-12 00:24:34 +00:00
|
|
|
go s.joinConsulServer(m, port)
|
2013-12-10 00:05:15 +00:00
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// remoteJoin is used to handle join events on the wan serf cluster
|
|
|
|
func (s *Server) remoteJoin(me serf.MemberEvent) {
|
2013-12-12 00:24:34 +00:00
|
|
|
for _, m := range me.Members {
|
2013-12-19 22:37:54 +00:00
|
|
|
ok, dc, port := isConsulServer(m)
|
2013-12-12 00:24:34 +00:00
|
|
|
if !ok {
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: non-server in WAN pool: %s %s", m.Name)
|
2013-12-12 00:24:34 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: port}
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[INFO] consul: adding server for datacenter: %s, addr: %s", dc, addr)
|
2013-12-12 00:24:34 +00:00
|
|
|
|
|
|
|
// Check if this server is known
|
|
|
|
found := false
|
|
|
|
s.remoteLock.Lock()
|
|
|
|
existing := s.remoteConsuls[dc]
|
|
|
|
for _, e := range existing {
|
|
|
|
if e.String() == addr.String() {
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2013-12-12 00:24:34 +00:00
|
|
|
// Add ot the list if not known
|
|
|
|
if !found {
|
|
|
|
s.remoteConsuls[dc] = append(existing, addr)
|
|
|
|
}
|
|
|
|
s.remoteLock.Unlock()
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// remoteFailed is used to handle fail events on the wan serf cluster
|
|
|
|
func (s *Server) remoteFailed(me serf.MemberEvent) {
|
2013-12-12 00:24:34 +00:00
|
|
|
for _, m := range me.Members {
|
2013-12-19 22:37:54 +00:00
|
|
|
ok, dc, port := isConsulServer(m)
|
2013-12-12 00:24:34 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: port}
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[INFO] consul: removing server for datacenter: %s, addr: %s", dc, addr)
|
2013-12-12 00:24:34 +00:00
|
|
|
|
|
|
|
// Remove the server if known
|
|
|
|
s.remoteLock.Lock()
|
|
|
|
existing := s.remoteConsuls[dc]
|
|
|
|
n := len(existing)
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
if existing[i].String() == addr.String() {
|
|
|
|
existing[i], existing[n-1] = existing[n-1], nil
|
|
|
|
existing = existing[:n-1]
|
|
|
|
n--
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2013-12-12 00:24:34 +00:00
|
|
|
// Trim the list if all known consuls are dead
|
|
|
|
if n == 0 {
|
|
|
|
delete(s.remoteConsuls, dc)
|
|
|
|
} else {
|
|
|
|
s.remoteConsuls[dc] = existing
|
|
|
|
}
|
|
|
|
s.remoteLock.Unlock()
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
2013-12-10 00:05:15 +00:00
|
|
|
|
|
|
|
// joinConsulServer is used to try to join another consul server
|
|
|
|
func (s *Server) joinConsulServer(m serf.Member, port int) {
|
|
|
|
if m.Name == s.config.NodeName {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: port}
|
|
|
|
var future raft.Future
|
|
|
|
|
|
|
|
CHECK:
|
|
|
|
// Get the Raft peers
|
|
|
|
peers, err := s.raftPeers.Peers()
|
|
|
|
if err != nil {
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[ERR] consul: failed to get raft peers: %v", err)
|
2013-12-10 00:05:15 +00:00
|
|
|
goto WAIT
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bail if this node is already a peer
|
|
|
|
for _, p := range peers {
|
|
|
|
if p.String() == addr.String() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-10 00:25:24 +00:00
|
|
|
// Bail if the node is not alive
|
|
|
|
if memberStatus(s.serfLAN.Members(), m.Name) != serf.StatusAlive {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2013-12-10 00:05:15 +00:00
|
|
|
// Attempt to add as a peer
|
|
|
|
future = s.raft.AddPeer(addr)
|
|
|
|
if err := future.Error(); err != nil {
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err)
|
2013-12-10 00:05:15 +00:00
|
|
|
} else {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
WAIT:
|
|
|
|
time.Sleep(500 * time.Millisecond)
|
2013-12-10 00:25:24 +00:00
|
|
|
select {
|
|
|
|
case <-s.shutdownCh:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
goto CHECK
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// memberStatus scans a list of members for a matching one,
|
|
|
|
// returning the status or StatusNone
|
|
|
|
func memberStatus(members []serf.Member, name string) serf.MemberStatus {
|
|
|
|
for _, m := range members {
|
|
|
|
if m.Name == name {
|
|
|
|
return m.Status
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return serf.StatusNone
|
2013-12-10 00:05:15 +00:00
|
|
|
}
|