2013-12-07 01:18:09 +00:00
|
|
|
package consul
|
2013-12-07 00:54:33 +00:00
|
|
|
|
2013-12-09 23:29:44 +00:00
|
|
|
import (
|
2014-06-18 16:03:30 +00:00
|
|
|
"net"
|
2014-02-19 20:36:27 +00:00
|
|
|
"strings"
|
2014-06-18 16:03:30 +00:00
|
|
|
|
2016-03-30 00:39:19 +00:00
|
|
|
"github.com/hashicorp/consul/consul/agent"
|
2014-06-18 16:03:30 +00:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2013-12-09 23:29:44 +00:00
|
|
|
)
|
|
|
|
|
2014-03-20 19:51:49 +00:00
|
|
|
const (
|
|
|
|
// StatusReap is used to update the status of a node if we
|
|
|
|
// are handling a EventMemberReap
|
|
|
|
StatusReap = serf.MemberStatus(-1)
|
2014-08-27 02:04:07 +00:00
|
|
|
|
|
|
|
// userEventPrefix is pre-pended to a user event to distinguish it
|
|
|
|
userEventPrefix = "consul:event:"
|
2014-03-20 19:51:49 +00:00
|
|
|
)
|
|
|
|
|
2014-08-27 01:50:03 +00:00
|
|
|
// userEventName computes the name of a user event
|
|
|
|
func userEventName(name string) string {
|
2014-08-27 02:04:07 +00:00
|
|
|
return userEventPrefix + name
|
|
|
|
}
|
|
|
|
|
|
|
|
// isUserEvent checks if a serf event is a user event
|
|
|
|
func isUserEvent(name string) bool {
|
|
|
|
return strings.HasPrefix(name, userEventPrefix)
|
|
|
|
}
|
|
|
|
|
|
|
|
// rawUserEventName is used to get the raw user event name
|
|
|
|
func rawUserEventName(name string) string {
|
|
|
|
return strings.TrimPrefix(name, userEventPrefix)
|
2014-08-27 01:50:03 +00:00
|
|
|
}
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
// lanEventHandler is used to handle events from the lan Serf cluster
|
|
|
|
func (s *Server) lanEventHandler() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case e := <-s.eventChLAN:
|
2013-12-09 23:29:44 +00:00
|
|
|
switch e.EventType() {
|
|
|
|
case serf.EventMemberJoin:
|
2015-07-29 23:33:25 +00:00
|
|
|
s.lanNodeJoin(e.(serf.MemberEvent))
|
2014-05-27 22:16:07 +00:00
|
|
|
s.localMemberEvent(e.(serf.MemberEvent))
|
|
|
|
|
2015-05-27 01:30:14 +00:00
|
|
|
case serf.EventMemberLeave, serf.EventMemberFailed:
|
2015-07-29 23:33:25 +00:00
|
|
|
s.lanNodeFailed(e.(serf.MemberEvent))
|
2014-05-27 22:16:07 +00:00
|
|
|
s.localMemberEvent(e.(serf.MemberEvent))
|
|
|
|
|
2014-03-20 19:51:49 +00:00
|
|
|
case serf.EventMemberReap:
|
2014-01-09 23:49:09 +00:00
|
|
|
s.localMemberEvent(e.(serf.MemberEvent))
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventUser:
|
2014-02-19 20:36:27 +00:00
|
|
|
s.localEvent(e.(serf.UserEvent))
|
2014-03-20 19:51:49 +00:00
|
|
|
case serf.EventMemberUpdate: // Ignore
|
2014-03-12 19:46:14 +00:00
|
|
|
case serf.EventQuery: // Ignore
|
2013-12-09 23:29:44 +00:00
|
|
|
default:
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: unhandled LAN Serf Event: %#v", e)
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
case <-s.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// wanEventHandler is used to handle events from the wan Serf cluster
|
|
|
|
func (s *Server) wanEventHandler() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case e := <-s.eventChWAN:
|
2013-12-09 23:29:44 +00:00
|
|
|
switch e.EventType() {
|
|
|
|
case serf.EventMemberJoin:
|
2015-07-29 23:33:25 +00:00
|
|
|
s.wanNodeJoin(e.(serf.MemberEvent))
|
2015-05-27 01:30:14 +00:00
|
|
|
case serf.EventMemberLeave, serf.EventMemberFailed:
|
2015-07-29 23:33:25 +00:00
|
|
|
s.wanNodeFailed(e.(serf.MemberEvent))
|
2014-03-12 19:46:14 +00:00
|
|
|
case serf.EventMemberUpdate: // Ignore
|
|
|
|
case serf.EventMemberReap: // Ignore
|
2013-12-09 23:29:44 +00:00
|
|
|
case serf.EventUser:
|
2014-03-12 19:46:14 +00:00
|
|
|
case serf.EventQuery: // Ignore
|
2013-12-09 23:29:44 +00:00
|
|
|
default:
|
2014-01-10 19:06:11 +00:00
|
|
|
s.logger.Printf("[WARN] consul: unhandled WAN Serf Event: %#v", e)
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2013-12-07 00:54:33 +00:00
|
|
|
case <-s.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2014-01-09 23:49:09 +00:00
|
|
|
// localMemberEvent is used to reconcile Serf events with the strongly
|
|
|
|
// consistent store if we are the current leader
|
|
|
|
func (s *Server) localMemberEvent(me serf.MemberEvent) {
|
|
|
|
// Do nothing if we are not the leader
|
|
|
|
if !s.IsLeader() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-03-20 19:51:49 +00:00
|
|
|
// Check if this is a reap event
|
|
|
|
isReap := me.EventType() == serf.EventMemberReap
|
|
|
|
|
2014-01-10 20:55:55 +00:00
|
|
|
// Queue the members for reconciliation
|
2014-01-09 23:49:09 +00:00
|
|
|
for _, m := range me.Members {
|
2014-03-20 19:51:49 +00:00
|
|
|
// Change the status if this is a reap event
|
|
|
|
if isReap {
|
|
|
|
m.Status = StatusReap
|
|
|
|
}
|
2014-01-10 20:55:55 +00:00
|
|
|
select {
|
|
|
|
case s.reconcileCh <- m:
|
|
|
|
default:
|
2013-12-10 00:05:15 +00:00
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2014-02-19 20:36:27 +00:00
|
|
|
// localEvent is called when we receive an event on the local Serf
|
|
|
|
func (s *Server) localEvent(event serf.UserEvent) {
|
|
|
|
// Handle only consul events
|
|
|
|
if !strings.HasPrefix(event.Name, "consul:") {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-08-27 02:04:07 +00:00
|
|
|
switch name := event.Name; {
|
|
|
|
case name == newLeaderEvent:
|
2014-02-19 20:36:27 +00:00
|
|
|
s.logger.Printf("[INFO] consul: New leader elected: %s", event.Payload)
|
|
|
|
|
|
|
|
// Trigger the callback
|
|
|
|
if s.config.ServerUp != nil {
|
|
|
|
s.config.ServerUp()
|
|
|
|
}
|
2014-08-27 02:04:07 +00:00
|
|
|
case isUserEvent(name):
|
|
|
|
event.Name = rawUserEventName(name)
|
|
|
|
s.logger.Printf("[DEBUG] consul: user event: %s", event.Name)
|
|
|
|
|
|
|
|
// Trigger the callback
|
|
|
|
if s.config.UserEventHandler != nil {
|
|
|
|
s.config.UserEventHandler(event)
|
|
|
|
}
|
2014-02-19 20:36:27 +00:00
|
|
|
default:
|
|
|
|
s.logger.Printf("[WARN] consul: Unhandled local event: %v", event)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-29 23:33:25 +00:00
|
|
|
// lanNodeJoin is used to handle join events on the LAN pool.
|
|
|
|
func (s *Server) lanNodeJoin(me serf.MemberEvent) {
|
2013-12-12 00:24:34 +00:00
|
|
|
for _, m := range me.Members {
|
2016-03-30 00:39:19 +00:00
|
|
|
ok, parts := agent.IsConsulServer(m)
|
2013-12-12 00:24:34 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
2015-07-29 23:33:25 +00:00
|
|
|
s.logger.Printf("[INFO] consul: adding LAN server %s", parts)
|
|
|
|
|
|
|
|
// See if it's configured as part of our DC.
|
|
|
|
if parts.Datacenter == s.config.Datacenter {
|
|
|
|
s.localLock.Lock()
|
|
|
|
s.localConsuls[parts.Addr.String()] = parts
|
|
|
|
s.localLock.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we still expecting to bootstrap, may need to handle this.
|
|
|
|
if s.config.BootstrapExpect != 0 {
|
|
|
|
s.maybeBootstrap()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// wanNodeJoin is used to handle join events on the WAN pool.
|
|
|
|
func (s *Server) wanNodeJoin(me serf.MemberEvent) {
|
|
|
|
for _, m := range me.Members {
|
2016-03-30 00:39:19 +00:00
|
|
|
ok, parts := agent.IsConsulServer(m)
|
2015-07-29 23:33:25 +00:00
|
|
|
if !ok {
|
|
|
|
s.logger.Printf("[WARN] consul: non-server in WAN pool: %s", m.Name)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
s.logger.Printf("[INFO] consul: adding WAN server %s", parts)
|
2013-12-12 00:24:34 +00:00
|
|
|
|
2015-07-29 23:33:25 +00:00
|
|
|
// Search for this node in our existing remotes.
|
2013-12-12 00:24:34 +00:00
|
|
|
found := false
|
|
|
|
s.remoteLock.Lock()
|
2014-01-20 23:39:07 +00:00
|
|
|
existing := s.remoteConsuls[parts.Datacenter]
|
2014-05-27 22:07:31 +00:00
|
|
|
for idx, e := range existing {
|
|
|
|
if e.Name == parts.Name {
|
|
|
|
existing[idx] = parts
|
2013-12-12 00:24:34 +00:00
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2015-07-29 23:33:25 +00:00
|
|
|
// Add to the list if not known.
|
2013-12-12 00:24:34 +00:00
|
|
|
if !found {
|
2014-05-27 22:07:31 +00:00
|
|
|
s.remoteConsuls[parts.Datacenter] = append(existing, parts)
|
2013-12-12 00:24:34 +00:00
|
|
|
}
|
|
|
|
s.remoteLock.Unlock()
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|
|
|
|
|
2014-06-18 23:15:28 +00:00
|
|
|
// maybeBootsrap is used to handle bootstrapping when a new consul server joins
|
|
|
|
func (s *Server) maybeBootstrap() {
|
|
|
|
index, err := s.raftStore.LastIndex()
|
|
|
|
if err != nil {
|
|
|
|
s.logger.Printf("[ERR] consul: failed to read last raft index: %v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bootstrap can only be done if there are no committed logs,
|
|
|
|
// remove our expectations of bootstrapping
|
|
|
|
if index != 0 {
|
2014-06-20 00:08:48 +00:00
|
|
|
s.config.BootstrapExpect = 0
|
2014-06-18 23:15:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan for all the known servers
|
|
|
|
members := s.serfLAN.Members()
|
2015-05-08 18:35:12 +00:00
|
|
|
addrs := make([]string, 0)
|
2014-06-18 23:15:28 +00:00
|
|
|
for _, member := range members {
|
2016-03-30 00:39:19 +00:00
|
|
|
valid, p := agent.IsConsulServer(member)
|
2014-06-18 23:15:28 +00:00
|
|
|
if !valid {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if p.Datacenter != s.config.Datacenter {
|
|
|
|
s.logger.Printf("[ERR] consul: Member %v has a conflicting datacenter, ignoring", member)
|
|
|
|
continue
|
|
|
|
}
|
2014-06-20 00:08:48 +00:00
|
|
|
if p.Expect != 0 && p.Expect != s.config.BootstrapExpect {
|
2014-06-18 23:15:28 +00:00
|
|
|
s.logger.Printf("[ERR] consul: Member %v has a conflicting expect value. All nodes should expect the same number.", member)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if p.Bootstrap {
|
|
|
|
s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member)
|
|
|
|
return
|
|
|
|
}
|
2015-05-08 18:35:12 +00:00
|
|
|
addr := &net.TCPAddr{IP: member.Addr, Port: p.Port}
|
|
|
|
addrs = append(addrs, addr.String())
|
2014-06-18 23:15:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Skip if we haven't met the minimum expect count
|
2014-06-20 00:08:48 +00:00
|
|
|
if len(addrs) < s.config.BootstrapExpect {
|
2014-06-18 23:15:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the peer set
|
|
|
|
s.logger.Printf("[INFO] consul: Attempting bootstrap with nodes: %v", addrs)
|
|
|
|
if err := s.raft.SetPeers(addrs).Error(); err != nil {
|
|
|
|
s.logger.Printf("[ERR] consul: failed to bootstrap peers: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-09-15 12:22:08 +00:00
|
|
|
// Bootstrapping complete, don't enter this again
|
2014-06-20 00:08:48 +00:00
|
|
|
s.config.BootstrapExpect = 0
|
2014-06-18 23:15:28 +00:00
|
|
|
}
|
|
|
|
|
2015-07-29 23:33:25 +00:00
|
|
|
// lanNodeFailed is used to handle fail events on the LAN pool.
|
|
|
|
func (s *Server) lanNodeFailed(me serf.MemberEvent) {
|
|
|
|
for _, m := range me.Members {
|
2016-03-30 00:39:19 +00:00
|
|
|
ok, parts := agent.IsConsulServer(m)
|
2015-07-29 23:33:25 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
s.logger.Printf("[INFO] consul: removing LAN server %s", parts)
|
|
|
|
|
|
|
|
s.localLock.Lock()
|
|
|
|
delete(s.localConsuls, parts.Addr.String())
|
|
|
|
s.localLock.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// wanNodeFailed is used to handle fail events on the WAN pool.
|
|
|
|
func (s *Server) wanNodeFailed(me serf.MemberEvent) {
|
2013-12-12 00:24:34 +00:00
|
|
|
for _, m := range me.Members {
|
2016-03-30 00:39:19 +00:00
|
|
|
ok, parts := agent.IsConsulServer(m)
|
2013-12-12 00:24:34 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
2015-07-29 23:33:25 +00:00
|
|
|
s.logger.Printf("[INFO] consul: removing WAN server %s", parts)
|
2013-12-12 00:24:34 +00:00
|
|
|
|
|
|
|
// Remove the server if known
|
|
|
|
s.remoteLock.Lock()
|
2014-01-20 23:39:07 +00:00
|
|
|
existing := s.remoteConsuls[parts.Datacenter]
|
2013-12-12 00:24:34 +00:00
|
|
|
n := len(existing)
|
|
|
|
for i := 0; i < n; i++ {
|
2014-05-27 22:07:31 +00:00
|
|
|
if existing[i].Name == parts.Name {
|
2013-12-12 00:24:34 +00:00
|
|
|
existing[i], existing[n-1] = existing[n-1], nil
|
|
|
|
existing = existing[:n-1]
|
|
|
|
n--
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
|
2013-12-12 00:24:34 +00:00
|
|
|
// Trim the list if all known consuls are dead
|
|
|
|
if n == 0 {
|
2014-01-20 23:39:07 +00:00
|
|
|
delete(s.remoteConsuls, parts.Datacenter)
|
2013-12-12 00:24:34 +00:00
|
|
|
} else {
|
2014-01-20 23:39:07 +00:00
|
|
|
s.remoteConsuls[parts.Datacenter] = existing
|
2013-12-12 00:24:34 +00:00
|
|
|
}
|
|
|
|
s.remoteLock.Unlock()
|
|
|
|
}
|
2013-12-09 23:29:44 +00:00
|
|
|
}
|