2015-06-01 15:49:10 +00:00
|
|
|
package nomad
|
|
|
|
|
|
|
|
import (
|
2015-06-03 10:26:50 +00:00
|
|
|
"crypto/tls"
|
|
|
|
"errors"
|
2015-06-01 15:49:10 +00:00
|
|
|
"fmt"
|
|
|
|
"log"
|
2015-06-03 10:26:50 +00:00
|
|
|
"net"
|
|
|
|
"net/rpc"
|
2015-06-01 15:49:10 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2015-06-03 10:26:50 +00:00
|
|
|
"reflect"
|
2015-11-24 21:15:01 +00:00
|
|
|
"sort"
|
2015-06-03 10:26:50 +00:00
|
|
|
"strconv"
|
2015-06-01 15:49:10 +00:00
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
"github.com/hashicorp/consul/tlsutil"
|
2015-08-29 21:22:24 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
2015-06-01 15:49:10 +00:00
|
|
|
"github.com/hashicorp/raft"
|
|
|
|
"github.com/hashicorp/raft-boltdb"
|
2015-06-03 10:58:00 +00:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2015-06-01 15:49:10 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
raftState = "raft/"
|
2015-06-03 10:58:00 +00:00
|
|
|
serfSnapshot = "serf/snapshot"
|
2015-06-01 15:49:10 +00:00
|
|
|
snapshotsRetained = 2
|
|
|
|
|
2015-06-07 18:50:29 +00:00
|
|
|
// serverRPCCache controls how long we keep an idle connection open to a server
|
|
|
|
serverRPCCache = 2 * time.Minute
|
|
|
|
|
|
|
|
// serverMaxStreams controsl how many idle streams we keep open to a server
|
|
|
|
serverMaxStreams = 64
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// raftLogCacheSize is the maximum number of logs to cache in-memory.
|
|
|
|
// This is used to reduce disk I/O for the recently commited entries.
|
|
|
|
raftLogCacheSize = 512
|
2015-06-03 11:25:50 +00:00
|
|
|
|
|
|
|
// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
|
|
|
|
// to replicate to gracefully leave the cluster.
|
|
|
|
raftRemoveGracePeriod = 5 * time.Second
|
2015-08-15 19:59:10 +00:00
|
|
|
|
|
|
|
// apiMajorVersion is returned as part of the Status.Version request.
|
|
|
|
// It should be incremented anytime the APIs are changed in a way that
|
|
|
|
// would break clients for sane client versioning.
|
|
|
|
apiMajorVersion = 1
|
|
|
|
|
|
|
|
// apiMinorVersion is returned as part of the Status.Version request.
|
|
|
|
// It should be incremented anytime the APIs are changed to allow
|
|
|
|
// for sane client versioning. Minor changes should be compatible
|
|
|
|
// within the major version.
|
|
|
|
apiMinorVersion = 1
|
2015-06-01 15:49:10 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Server is Nomad server which manages the job queues,
|
|
|
|
// schedulers, and notification bus for agents.
|
|
|
|
type Server struct {
|
|
|
|
config *Config
|
|
|
|
logger *log.Logger
|
|
|
|
|
2015-06-07 18:50:29 +00:00
|
|
|
// Connection pool to other Nomad servers
|
|
|
|
connPool *ConnPool
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// Endpoints holds our RPC endpoints
|
|
|
|
endpoints endpoints
|
|
|
|
|
2015-06-03 11:25:50 +00:00
|
|
|
// The raft instance is used among Nomad nodes within the
|
|
|
|
// region to protect operations that require strong consistency
|
2015-09-07 17:46:41 +00:00
|
|
|
leaderCh <-chan bool
|
2015-06-01 15:49:10 +00:00
|
|
|
raft *raft.Raft
|
|
|
|
raftLayer *RaftLayer
|
|
|
|
raftPeers raft.PeerStore
|
|
|
|
raftStore *raftboltdb.BoltStore
|
2015-06-01 19:11:40 +00:00
|
|
|
raftInmem *raft.InmemStore
|
2015-06-01 15:49:10 +00:00
|
|
|
raftTransport *raft.NetworkTransport
|
|
|
|
|
|
|
|
// fsm is the state machine used with Raft
|
|
|
|
fsm *nomadFSM
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// rpcListener is used to listen for incoming connections
|
2015-06-03 10:58:00 +00:00
|
|
|
rpcListener net.Listener
|
|
|
|
rpcServer *rpc.Server
|
|
|
|
rpcAdvertise net.Addr
|
2015-06-03 10:26:50 +00:00
|
|
|
|
|
|
|
// rpcTLS is the TLS config for incoming TLS requests
|
|
|
|
rpcTLS *tls.Config
|
|
|
|
|
2015-06-04 10:33:12 +00:00
|
|
|
// peers is used to track the known Nomad servers. This is
|
|
|
|
// used for region forwarding and clustering.
|
2015-06-07 18:32:01 +00:00
|
|
|
peers map[string][]*serverParts
|
|
|
|
localPeers map[string]*serverParts
|
|
|
|
peerLock sync.RWMutex
|
2015-06-04 10:33:12 +00:00
|
|
|
|
2015-06-03 10:58:00 +00:00
|
|
|
// serf is the Serf cluster containing only Nomad
|
|
|
|
// servers. This is used for multi-region federation
|
|
|
|
// and automatic clustering within regions.
|
|
|
|
serf *serf.Serf
|
|
|
|
|
2015-06-04 10:42:56 +00:00
|
|
|
// reconcileCh is used to pass events from the serf handler
|
|
|
|
// into the leader manager. Mostly used to handle when servers
|
|
|
|
// join/leave from the region.
|
|
|
|
reconcileCh chan serf.Member
|
|
|
|
|
2015-06-03 10:58:00 +00:00
|
|
|
// eventCh is used to receive events from the serf cluster
|
|
|
|
eventCh chan serf.Event
|
|
|
|
|
2015-07-24 04:44:17 +00:00
|
|
|
// evalBroker is used to manage the in-progress evaluations
|
|
|
|
// that are waiting to be brokered to a sub-scheduler
|
|
|
|
evalBroker *EvalBroker
|
|
|
|
|
2016-01-29 23:31:32 +00:00
|
|
|
// BlockedEvals is used to manage evaluations that are blocked on node
|
|
|
|
// capacity changes.
|
|
|
|
blockedEvals *BlockedEvals
|
|
|
|
|
2015-07-27 22:11:42 +00:00
|
|
|
// planQueue is used to manage the submitted allocation
|
|
|
|
// plans that are waiting to be assessed by the leader
|
|
|
|
planQueue *PlanQueue
|
|
|
|
|
2015-12-18 20:26:28 +00:00
|
|
|
// periodicDispatcher is used to track and create evaluations for periodic jobs.
|
|
|
|
periodicDispatcher *PeriodicDispatch
|
2015-12-01 22:54:57 +00:00
|
|
|
|
2015-08-23 00:17:13 +00:00
|
|
|
// heartbeatTimers track the expiration time of each heartbeat that has
|
|
|
|
// a TTL. On expiration, the node status is updated to be 'down'.
|
|
|
|
heartbeatTimers map[string]*time.Timer
|
|
|
|
heartbeatTimersLock sync.Mutex
|
|
|
|
|
2015-08-23 17:53:53 +00:00
|
|
|
// Worker used for processing
|
|
|
|
workers []*Worker
|
|
|
|
|
2015-06-03 11:25:50 +00:00
|
|
|
left bool
|
2015-06-01 15:49:10 +00:00
|
|
|
shutdown bool
|
|
|
|
shutdownCh chan struct{}
|
|
|
|
shutdownLock sync.Mutex
|
|
|
|
}
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// Holds the RPC endpoints
|
|
|
|
type endpoints struct {
|
2016-01-13 18:19:53 +00:00
|
|
|
Status *Status
|
|
|
|
Node *Node
|
|
|
|
Job *Job
|
|
|
|
Eval *Eval
|
|
|
|
Plan *Plan
|
|
|
|
Alloc *Alloc
|
|
|
|
Region *Region
|
|
|
|
Periodic *Periodic
|
2015-06-03 10:26:50 +00:00
|
|
|
}
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// NewServer is used to construct a new Nomad server from the
|
|
|
|
// configuration, potentially returning an error
|
|
|
|
func NewServer(config *Config) (*Server, error) {
|
|
|
|
// Check the protocol version
|
|
|
|
if err := config.CheckVersion(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure we have a log output
|
|
|
|
if config.LogOutput == nil {
|
|
|
|
config.LogOutput = os.Stderr
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a logger
|
|
|
|
logger := log.New(config.LogOutput, "", log.LstdFlags)
|
|
|
|
|
2015-07-24 04:44:17 +00:00
|
|
|
// Create an eval broker
|
2015-08-16 17:55:55 +00:00
|
|
|
evalBroker, err := NewEvalBroker(config.EvalNackTimeout, config.EvalDeliveryLimit)
|
2015-07-24 04:44:17 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-01-29 23:31:32 +00:00
|
|
|
// Create a new blocked eval tracker.
|
|
|
|
blockedEvals := NewBlockedEvals(evalBroker)
|
|
|
|
|
2015-07-27 22:11:42 +00:00
|
|
|
// Create a plan queue
|
|
|
|
planQueue, err := NewPlanQueue()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// Create the server
|
|
|
|
s := &Server{
|
2016-01-29 23:31:32 +00:00
|
|
|
config: config,
|
|
|
|
connPool: NewPool(config.LogOutput, serverRPCCache, serverMaxStreams, nil),
|
|
|
|
logger: logger,
|
|
|
|
rpcServer: rpc.NewServer(),
|
|
|
|
peers: make(map[string][]*serverParts),
|
|
|
|
localPeers: make(map[string]*serverParts),
|
|
|
|
reconcileCh: make(chan serf.Member, 32),
|
|
|
|
eventCh: make(chan serf.Event, 256),
|
|
|
|
evalBroker: evalBroker,
|
|
|
|
blockedEvals: blockedEvals,
|
|
|
|
planQueue: planQueue,
|
|
|
|
shutdownCh: make(chan struct{}),
|
2015-06-01 15:49:10 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:26:28 +00:00
|
|
|
// Create the periodic dispatcher for launching periodic jobs.
|
2015-12-19 01:26:05 +00:00
|
|
|
s.periodicDispatcher = NewPeriodicDispatch(s.logger, s)
|
2015-12-18 20:26:28 +00:00
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// Initialize the RPC layer
|
|
|
|
// TODO: TLS...
|
|
|
|
if err := s.setupRPC(nil); err != nil {
|
|
|
|
s.Shutdown()
|
2015-09-11 17:26:33 +00:00
|
|
|
logger.Printf("[ERR] nomad: failed to start RPC layer: %s", err)
|
2015-06-03 10:26:50 +00:00
|
|
|
return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// Initialize the Raft server
|
|
|
|
if err := s.setupRaft(); err != nil {
|
|
|
|
s.Shutdown()
|
2015-09-11 17:26:33 +00:00
|
|
|
logger.Printf("[ERR] nomad: failed to start Raft: %s", err)
|
2015-06-01 15:49:10 +00:00
|
|
|
return nil, fmt.Errorf("Failed to start Raft: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-06-03 10:58:00 +00:00
|
|
|
// Initialize the wan Serf
|
|
|
|
s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot)
|
|
|
|
if err != nil {
|
|
|
|
s.Shutdown()
|
2015-09-11 17:26:33 +00:00
|
|
|
logger.Printf("[ERR] nomad: failed to start serf WAN: %s", err)
|
2015-06-03 10:58:00 +00:00
|
|
|
return nil, fmt.Errorf("Failed to start serf: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-07-28 22:12:08 +00:00
|
|
|
// Intialize the scheduling workers
|
|
|
|
if err := s.setupWorkers(); err != nil {
|
|
|
|
s.Shutdown()
|
2015-09-11 17:26:33 +00:00
|
|
|
logger.Printf("[ERR] nomad: failed to start workers: %s", err)
|
2015-07-28 22:12:08 +00:00
|
|
|
return nil, fmt.Errorf("Failed to start workers: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-09-07 17:46:41 +00:00
|
|
|
// Monitor leadership changes
|
|
|
|
go s.monitorLeadership()
|
|
|
|
|
|
|
|
// Start ingesting events for Serf
|
|
|
|
go s.serfEventHandler()
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// Start the RPC listeners
|
|
|
|
go s.listen()
|
|
|
|
|
2015-07-27 22:11:42 +00:00
|
|
|
// Emit metrics for the eval broker
|
2015-08-05 23:45:50 +00:00
|
|
|
go evalBroker.EmitStats(time.Second, s.shutdownCh)
|
2015-07-24 05:17:37 +00:00
|
|
|
|
2015-07-27 22:11:42 +00:00
|
|
|
// Emit metrics for the plan queue
|
2015-08-05 23:45:50 +00:00
|
|
|
go planQueue.EmitStats(time.Second, s.shutdownCh)
|
2015-07-27 22:11:42 +00:00
|
|
|
|
2016-02-01 02:46:45 +00:00
|
|
|
// Emit metrics for the blocked eval tracker.
|
|
|
|
go blockedEvals.EmitStats(time.Second, s.shutdownCh)
|
|
|
|
|
2015-08-23 00:17:13 +00:00
|
|
|
// Emit metrics
|
|
|
|
go s.heartbeatStats()
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// Done
|
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown is used to shutdown the server
|
|
|
|
func (s *Server) Shutdown() error {
|
|
|
|
s.logger.Printf("[INFO] nomad: shutting down server")
|
|
|
|
s.shutdownLock.Lock()
|
|
|
|
defer s.shutdownLock.Unlock()
|
|
|
|
|
|
|
|
if s.shutdown {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
s.shutdown = true
|
|
|
|
close(s.shutdownCh)
|
|
|
|
|
2015-06-05 21:54:45 +00:00
|
|
|
if s.serf != nil {
|
|
|
|
s.serf.Shutdown()
|
|
|
|
}
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
if s.raft != nil {
|
|
|
|
s.raftTransport.Close()
|
|
|
|
s.raftLayer.Close()
|
|
|
|
future := s.raft.Shutdown()
|
|
|
|
if err := future.Error(); err != nil {
|
|
|
|
s.logger.Printf("[WARN] nomad: Error shutting down raft: %s", err)
|
|
|
|
}
|
2015-06-03 09:26:49 +00:00
|
|
|
if s.raftStore != nil {
|
|
|
|
s.raftStore.Close()
|
|
|
|
}
|
2015-06-01 15:49:10 +00:00
|
|
|
}
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// Shutdown the RPC listener
|
|
|
|
if s.rpcListener != nil {
|
|
|
|
s.rpcListener.Close()
|
|
|
|
}
|
|
|
|
|
2015-06-07 18:50:29 +00:00
|
|
|
// Close the connection pool
|
|
|
|
s.connPool.Shutdown()
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// Close the fsm
|
|
|
|
if s.fsm != nil {
|
|
|
|
s.fsm.Close()
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-28 22:12:08 +00:00
|
|
|
// IsShutdown checks if the server is shutdown
|
|
|
|
func (s *Server) IsShutdown() bool {
|
|
|
|
select {
|
|
|
|
case <-s.shutdownCh:
|
|
|
|
return true
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-03 11:25:50 +00:00
|
|
|
// Leave is used to prepare for a graceful shutdown of the server
|
|
|
|
func (s *Server) Leave() error {
|
|
|
|
s.logger.Printf("[INFO] nomad: server starting leave")
|
|
|
|
s.left = true
|
|
|
|
|
|
|
|
// Check the number of known peers
|
|
|
|
numPeers, err := s.numOtherPeers()
|
|
|
|
if err != nil {
|
|
|
|
s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we are the current leader, and we have any other peers (cluster has multiple
|
|
|
|
// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
|
|
|
|
// not the leader, then we should issue our leave intention and wait to be removed
|
|
|
|
// for some sane period of time.
|
|
|
|
isLeader := s.IsLeader()
|
|
|
|
if isLeader && numPeers > 0 {
|
|
|
|
future := s.raft.RemovePeer(s.raftTransport.LocalAddr())
|
|
|
|
if err := future.Error(); err != nil && err != raft.ErrUnknownPeer {
|
|
|
|
s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Leave the gossip pool
|
|
|
|
if s.serf != nil {
|
|
|
|
if err := s.serf.Leave(); err != nil {
|
|
|
|
s.logger.Printf("[ERR] nomad: failed to leave Serf cluster: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we were not leader, wait to be safely removed from the cluster.
|
|
|
|
// We must wait to allow the raft replication to take place, otherwise
|
|
|
|
// an immediate shutdown could cause a loss of quorum.
|
|
|
|
if !isLeader {
|
|
|
|
limit := time.Now().Add(raftRemoveGracePeriod)
|
|
|
|
for numPeers > 0 && time.Now().Before(limit) {
|
|
|
|
// Update the number of peers
|
|
|
|
numPeers, err = s.numOtherPeers()
|
|
|
|
if err != nil {
|
|
|
|
s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Avoid the sleep if we are done
|
|
|
|
if numPeers == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sleep a while and check again
|
|
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
}
|
|
|
|
if numPeers != 0 {
|
|
|
|
s.logger.Printf("[WARN] nomad: failed to leave raft peer set gracefully, timeout")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// setupRPC is used to setup the RPC listener
|
|
|
|
func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
|
|
|
|
// Create endpoints
|
|
|
|
s.endpoints.Status = &Status{s}
|
2015-09-07 03:31:32 +00:00
|
|
|
s.endpoints.Node = &Node{s}
|
2015-07-23 21:41:18 +00:00
|
|
|
s.endpoints.Job = &Job{s}
|
2015-07-23 23:00:19 +00:00
|
|
|
s.endpoints.Eval = &Eval{s}
|
2015-07-27 22:31:49 +00:00
|
|
|
s.endpoints.Plan = &Plan{s}
|
2015-09-06 22:34:28 +00:00
|
|
|
s.endpoints.Alloc = &Alloc{s}
|
2015-11-24 05:47:11 +00:00
|
|
|
s.endpoints.Region = &Region{s}
|
2016-01-13 18:19:53 +00:00
|
|
|
s.endpoints.Periodic = &Periodic{s}
|
2015-06-03 10:26:50 +00:00
|
|
|
|
|
|
|
// Register the handlers
|
|
|
|
s.rpcServer.Register(s.endpoints.Status)
|
2015-09-07 03:31:32 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Node)
|
2015-07-23 21:41:18 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Job)
|
2015-07-23 23:00:19 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Eval)
|
2015-07-27 22:31:49 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Plan)
|
2015-09-06 22:34:28 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Alloc)
|
2015-11-24 05:47:11 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Region)
|
2016-01-13 18:19:53 +00:00
|
|
|
s.rpcServer.Register(s.endpoints.Periodic)
|
2015-06-03 10:26:50 +00:00
|
|
|
|
|
|
|
list, err := net.ListenTCP("tcp", s.config.RPCAddr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
s.rpcListener = list
|
|
|
|
|
|
|
|
if s.config.RPCAdvertise != nil {
|
2015-06-03 10:58:00 +00:00
|
|
|
s.rpcAdvertise = s.config.RPCAdvertise
|
2015-06-03 10:26:50 +00:00
|
|
|
} else {
|
2015-06-03 10:58:00 +00:00
|
|
|
s.rpcAdvertise = s.rpcListener.Addr()
|
2015-06-03 10:26:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Verify that we have a usable advertise address
|
2015-06-03 10:58:00 +00:00
|
|
|
addr, ok := s.rpcAdvertise.(*net.TCPAddr)
|
2015-06-03 10:26:50 +00:00
|
|
|
if !ok {
|
|
|
|
list.Close()
|
|
|
|
return fmt.Errorf("RPC advertise address is not a TCP Address: %v", addr)
|
|
|
|
}
|
|
|
|
if addr.IP.IsUnspecified() {
|
|
|
|
list.Close()
|
|
|
|
return fmt.Errorf("RPC advertise address is not advertisable: %v", addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Provide a DC specific wrapper. Raft replication is only
|
|
|
|
// ever done in the same datacenter, so we can provide it as a constant.
|
|
|
|
// wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap)
|
|
|
|
// TODO: TLS...
|
2015-06-03 10:58:00 +00:00
|
|
|
s.raftLayer = NewRaftLayer(s.rpcAdvertise, nil)
|
2015-06-03 10:26:50 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// setupRaft is used to setup and initialize Raft
|
|
|
|
func (s *Server) setupRaft() error {
|
|
|
|
// If we are in bootstrap mode, enable a single node cluster
|
2015-06-05 21:54:45 +00:00
|
|
|
if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
|
2015-06-01 15:49:10 +00:00
|
|
|
s.config.RaftConfig.EnableSingleNode = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the FSM
|
|
|
|
var err error
|
2016-01-29 23:31:32 +00:00
|
|
|
s.fsm, err = NewFSM(s.evalBroker, s.periodicDispatcher, s.blockedEvals, s.config.LogOutput)
|
2015-06-01 15:49:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-06-01 19:11:40 +00:00
|
|
|
// Create a transport layer
|
2015-08-26 00:36:52 +00:00
|
|
|
trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout,
|
2015-06-01 19:11:40 +00:00
|
|
|
s.config.LogOutput)
|
|
|
|
s.raftTransport = trans
|
2015-06-01 15:49:10 +00:00
|
|
|
|
|
|
|
// Create the backend raft store for logs and stable storage
|
2015-06-01 19:11:40 +00:00
|
|
|
var log raft.LogStore
|
|
|
|
var stable raft.StableStore
|
|
|
|
var snap raft.SnapshotStore
|
|
|
|
var peers raft.PeerStore
|
|
|
|
if s.config.DevMode {
|
|
|
|
store := raft.NewInmemStore()
|
|
|
|
s.raftInmem = store
|
|
|
|
stable = store
|
|
|
|
log = store
|
|
|
|
snap = raft.NewDiscardSnapshotStore()
|
|
|
|
peers = &raft.StaticPeers{}
|
2015-06-04 11:02:39 +00:00
|
|
|
s.raftPeers = peers
|
2015-06-01 19:11:40 +00:00
|
|
|
|
|
|
|
} else {
|
|
|
|
// Create the base raft path
|
|
|
|
path := filepath.Join(s.config.DataDir, raftState)
|
|
|
|
if err := ensurePath(path, true); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-06-01 15:49:10 +00:00
|
|
|
|
2015-06-01 19:11:40 +00:00
|
|
|
// Create the BoltDB backend
|
|
|
|
store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
s.raftStore = store
|
|
|
|
stable = store
|
2015-06-01 15:49:10 +00:00
|
|
|
|
2015-06-01 19:11:40 +00:00
|
|
|
// Wrap the store in a LogCache to improve performance
|
|
|
|
cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
|
|
|
|
if err != nil {
|
|
|
|
store.Close()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
log = cacheStore
|
2015-06-01 15:49:10 +00:00
|
|
|
|
2015-06-01 19:11:40 +00:00
|
|
|
// Create the snapshot store
|
|
|
|
snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
|
|
|
|
if err != nil {
|
|
|
|
if s.raftStore != nil {
|
|
|
|
s.raftStore.Close()
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
snap = snapshots
|
2015-06-01 15:49:10 +00:00
|
|
|
|
2015-06-01 19:11:40 +00:00
|
|
|
// Setup the peer store
|
|
|
|
s.raftPeers = raft.NewJSONPeers(path, trans)
|
|
|
|
peers = s.raftPeers
|
|
|
|
}
|
2015-06-01 15:49:10 +00:00
|
|
|
|
|
|
|
// Ensure local host is always included if we are in bootstrap mode
|
2015-06-01 19:11:40 +00:00
|
|
|
if s.config.RaftConfig.EnableSingleNode {
|
|
|
|
p, err := peers.Peers()
|
2015-06-01 15:49:10 +00:00
|
|
|
if err != nil {
|
2015-06-01 19:11:40 +00:00
|
|
|
if s.raftStore != nil {
|
|
|
|
s.raftStore.Close()
|
|
|
|
}
|
2015-06-01 15:49:10 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-06-01 19:11:40 +00:00
|
|
|
if !raft.PeerContained(p, trans.LocalAddr()) {
|
|
|
|
peers.SetPeers(raft.AddUniquePeer(p, trans.LocalAddr()))
|
2015-06-01 15:49:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure we set the LogOutput
|
|
|
|
s.config.RaftConfig.LogOutput = s.config.LogOutput
|
|
|
|
|
2015-09-07 17:46:41 +00:00
|
|
|
// Setup the leader channel
|
|
|
|
leaderCh := make(chan bool, 1)
|
|
|
|
s.config.RaftConfig.NotifyCh = leaderCh
|
|
|
|
s.leaderCh = leaderCh
|
|
|
|
|
2015-06-01 15:49:10 +00:00
|
|
|
// Setup the Raft store
|
2015-06-01 19:11:40 +00:00
|
|
|
s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable,
|
|
|
|
snap, peers, trans)
|
2015-06-01 15:49:10 +00:00
|
|
|
if err != nil {
|
2015-06-01 19:11:40 +00:00
|
|
|
if s.raftStore != nil {
|
|
|
|
s.raftStore.Close()
|
|
|
|
}
|
2015-06-01 15:49:10 +00:00
|
|
|
trans.Close()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2015-06-03 10:26:50 +00:00
|
|
|
|
2015-06-03 10:58:00 +00:00
|
|
|
// setupSerf is used to setup and initialize a Serf
|
|
|
|
func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (*serf.Serf, error) {
|
|
|
|
conf.Init()
|
|
|
|
conf.NodeName = fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region)
|
|
|
|
conf.Tags["role"] = "nomad"
|
|
|
|
conf.Tags["region"] = s.config.Region
|
|
|
|
conf.Tags["dc"] = s.config.Datacenter
|
|
|
|
conf.Tags["vsn"] = fmt.Sprintf("%d", s.config.ProtocolVersion)
|
|
|
|
conf.Tags["vsn_min"] = fmt.Sprintf("%d", ProtocolVersionMin)
|
|
|
|
conf.Tags["vsn_max"] = fmt.Sprintf("%d", ProtocolVersionMax)
|
|
|
|
conf.Tags["build"] = s.config.Build
|
2015-06-03 11:35:48 +00:00
|
|
|
conf.Tags["port"] = fmt.Sprintf("%d", s.rpcAdvertise.(*net.TCPAddr).Port)
|
2015-09-07 17:46:41 +00:00
|
|
|
if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
|
2015-06-03 10:58:00 +00:00
|
|
|
conf.Tags["bootstrap"] = "1"
|
|
|
|
}
|
|
|
|
if s.config.BootstrapExpect != 0 {
|
|
|
|
conf.Tags["expect"] = fmt.Sprintf("%d", s.config.BootstrapExpect)
|
|
|
|
}
|
|
|
|
conf.MemberlistConfig.LogOutput = s.config.LogOutput
|
|
|
|
conf.LogOutput = s.config.LogOutput
|
|
|
|
conf.EventCh = ch
|
2015-06-04 10:33:12 +00:00
|
|
|
if !s.config.DevMode {
|
|
|
|
conf.SnapshotPath = filepath.Join(s.config.DataDir, path)
|
|
|
|
if err := ensurePath(conf.SnapshotPath, false); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2015-06-03 10:58:00 +00:00
|
|
|
conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion]
|
|
|
|
conf.RejoinAfterLeave = true
|
|
|
|
conf.Merge = &serfMergeDelegate{}
|
|
|
|
|
|
|
|
// Until Nomad supports this fully, we disable automatic resolution.
|
|
|
|
// When enabled, the Serf gossip may just turn off if we are the minority
|
|
|
|
// node which is rather unexpected.
|
|
|
|
conf.EnableNameConflictResolution = false
|
|
|
|
return serf.Create(conf)
|
|
|
|
}
|
|
|
|
|
2015-07-28 22:12:08 +00:00
|
|
|
// setupWorkers is used to start the scheduling workers
|
|
|
|
func (s *Server) setupWorkers() error {
|
|
|
|
// Check if all the schedulers are disabled
|
|
|
|
if len(s.config.EnabledSchedulers) == 0 || s.config.NumSchedulers == 0 {
|
|
|
|
s.logger.Printf("[WARN] nomad: no enabled schedulers")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Start the workers
|
|
|
|
for i := 0; i < s.config.NumSchedulers; i++ {
|
2015-08-23 17:53:53 +00:00
|
|
|
if w, err := NewWorker(s); err != nil {
|
2015-07-28 22:12:08 +00:00
|
|
|
return err
|
2015-08-23 17:53:53 +00:00
|
|
|
} else {
|
|
|
|
s.workers = append(s.workers, w)
|
2015-07-28 22:12:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
s.logger.Printf("[INFO] nomad: starting %d scheduling worker(s) for %v",
|
|
|
|
s.config.NumSchedulers, s.config.EnabledSchedulers)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-06-03 11:25:50 +00:00
|
|
|
// numOtherPeers is used to check on the number of known peers
|
|
|
|
// excluding the local ndoe
|
|
|
|
func (s *Server) numOtherPeers() (int, error) {
|
|
|
|
peers, err := s.raftPeers.Peers()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
otherPeers := raft.ExcludePeer(peers, s.raftTransport.LocalAddr())
|
|
|
|
return len(otherPeers), nil
|
|
|
|
}
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// IsLeader checks if this server is the cluster leader
|
|
|
|
func (s *Server) IsLeader() bool {
|
|
|
|
return s.raft.State() == raft.Leader
|
|
|
|
}
|
|
|
|
|
2015-06-03 11:25:50 +00:00
|
|
|
// Join is used to have Nomad join the gossip ring
|
|
|
|
// The target address should be another node listening on the
|
|
|
|
// Serf address
|
|
|
|
func (s *Server) Join(addrs []string) (int, error) {
|
|
|
|
return s.serf.Join(addrs, true)
|
|
|
|
}
|
|
|
|
|
|
|
|
// LocalMember is used to return the local node
|
|
|
|
func (c *Server) LocalMember() serf.Member {
|
|
|
|
return c.serf.LocalMember()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Members is used to return the members of the serf cluster
|
|
|
|
func (s *Server) Members() []serf.Member {
|
|
|
|
return s.serf.Members()
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveFailedNode is used to remove a failed node from the cluster
|
|
|
|
func (s *Server) RemoveFailedNode(node string) error {
|
|
|
|
return s.serf.RemoveFailedNode(node)
|
|
|
|
}
|
|
|
|
|
|
|
|
// KeyManager returns the Serf keyring manager
|
|
|
|
func (s *Server) KeyManager() *serf.KeyManager {
|
|
|
|
return s.serf.KeyManager()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Encrypted determines if gossip is encrypted
|
|
|
|
func (s *Server) Encrypted() bool {
|
|
|
|
return s.serf.EncryptionEnabled()
|
|
|
|
}
|
|
|
|
|
2015-08-29 21:22:24 +00:00
|
|
|
// State returns the underlying state store. This should *not*
|
|
|
|
// be used to modify state directly.
|
|
|
|
func (s *Server) State() *state.StateStore {
|
|
|
|
return s.fsm.State()
|
|
|
|
}
|
|
|
|
|
2015-11-24 05:47:11 +00:00
|
|
|
// Regions returns the known regions in the cluster.
|
|
|
|
func (s *Server) Regions() []string {
|
2015-11-24 05:49:03 +00:00
|
|
|
s.peerLock.RLock()
|
|
|
|
defer s.peerLock.RUnlock()
|
2015-11-24 05:47:11 +00:00
|
|
|
|
|
|
|
regions := make([]string, 0, len(s.peers))
|
|
|
|
for region, _ := range s.peers {
|
|
|
|
regions = append(regions, region)
|
|
|
|
}
|
2015-11-24 21:15:01 +00:00
|
|
|
sort.Strings(regions)
|
2015-11-24 05:47:11 +00:00
|
|
|
return regions
|
|
|
|
}
|
|
|
|
|
2015-06-03 10:26:50 +00:00
|
|
|
// inmemCodec is used to do an RPC call without going over a network
|
|
|
|
type inmemCodec struct {
|
|
|
|
method string
|
|
|
|
args interface{}
|
|
|
|
reply interface{}
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
|
|
|
|
req.ServiceMethod = i.method
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inmemCodec) ReadRequestBody(args interface{}) error {
|
|
|
|
sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
|
|
|
|
dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
|
|
|
|
dst.Set(sourceValue)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
|
|
|
|
if resp.Error != "" {
|
|
|
|
i.err = errors.New(resp.Error)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
|
|
|
|
dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
|
|
|
|
dst.Set(sourceValue)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inmemCodec) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// RPC is used to make a local RPC call
|
|
|
|
func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
|
|
|
|
codec := &inmemCodec{
|
|
|
|
method: method,
|
|
|
|
args: args,
|
|
|
|
reply: reply,
|
|
|
|
}
|
|
|
|
if err := s.rpcServer.ServeRequest(codec); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return codec.err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stats is used to return statistics for debugging and insight
|
|
|
|
// for various sub-systems
|
|
|
|
func (s *Server) Stats() map[string]map[string]string {
|
|
|
|
toString := func(v uint64) string {
|
|
|
|
return strconv.FormatUint(v, 10)
|
|
|
|
}
|
|
|
|
stats := map[string]map[string]string{
|
|
|
|
"nomad": map[string]string{
|
|
|
|
"server": "true",
|
|
|
|
"leader": fmt.Sprintf("%v", s.IsLeader()),
|
|
|
|
"bootstrap": fmt.Sprintf("%v", s.config.Bootstrap),
|
2015-08-17 00:40:35 +00:00
|
|
|
"known_regions": toString(uint64(len(s.peers))),
|
2015-06-03 10:26:50 +00:00
|
|
|
},
|
|
|
|
"raft": s.raft.Stats(),
|
2015-06-03 11:08:04 +00:00
|
|
|
"serf": s.serf.Stats(),
|
2015-08-20 22:29:30 +00:00
|
|
|
"runtime": RuntimeStats(),
|
2015-06-03 10:26:50 +00:00
|
|
|
}
|
|
|
|
return stats
|
|
|
|
}
|