2015-06-01 15:49:10 +00:00
package nomad
import (
2017-11-20 15:38:46 +00:00
"context"
2015-06-03 10:26:50 +00:00
"crypto/tls"
2020-01-28 17:58:38 +00:00
"crypto/x509"
"errors"
2015-06-01 15:49:10 +00:00
"fmt"
2017-02-02 23:31:36 +00:00
"io/ioutil"
2015-06-03 10:26:50 +00:00
"net"
"net/rpc"
2017-02-02 23:31:36 +00:00
"os"
2015-06-01 15:49:10 +00:00
"path/filepath"
2015-11-24 21:15:01 +00:00
"sort"
2015-06-03 10:26:50 +00:00
"strconv"
2020-01-28 17:58:38 +00:00
"strings"
2015-06-01 15:49:10 +00:00
"sync"
2016-06-16 19:00:15 +00:00
"sync/atomic"
2015-06-01 15:49:10 +00:00
"time"
2019-06-14 21:30:27 +00:00
"github.com/armon/go-metrics"
2018-11-07 18:08:23 +00:00
"github.com/hashicorp/consul/agent/consul/autopilot"
2016-06-14 05:58:39 +00:00
consulapi "github.com/hashicorp/consul/api"
2018-11-07 18:08:23 +00:00
"github.com/hashicorp/consul/lib"
2018-09-15 23:23:13 +00:00
log "github.com/hashicorp/go-hclog"
2017-11-15 01:53:23 +00:00
multierror "github.com/hashicorp/go-multierror"
2017-08-19 23:49:53 +00:00
lru "github.com/hashicorp/golang-lru"
2016-06-14 05:58:39 +00:00
"github.com/hashicorp/nomad/command/agent/consul"
2018-01-11 18:17:23 +00:00
"github.com/hashicorp/nomad/helper/codec"
2018-01-12 21:58:44 +00:00
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/helper/stats"
2016-10-25 23:05:37 +00:00
"github.com/hashicorp/nomad/helper/tlsutil"
2017-06-28 22:35:52 +00:00
"github.com/hashicorp/nomad/nomad/deploymentwatcher"
2018-03-08 23:08:23 +00:00
"github.com/hashicorp/nomad/nomad/drainer"
2015-08-29 21:22:24 +00:00
"github.com/hashicorp/nomad/nomad/state"
2016-05-28 01:14:34 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2017-11-20 15:38:46 +00:00
"github.com/hashicorp/nomad/nomad/structs/config"
2020-04-30 13:13:00 +00:00
"github.com/hashicorp/nomad/nomad/volumewatcher"
2018-03-14 17:53:27 +00:00
"github.com/hashicorp/nomad/scheduler"
2015-06-01 15:49:10 +00:00
"github.com/hashicorp/raft"
2018-11-07 18:08:23 +00:00
raftboltdb "github.com/hashicorp/raft-boltdb"
2015-06-03 10:58:00 +00:00
"github.com/hashicorp/serf/serf"
2015-06-01 15:49:10 +00:00
)
const (
2016-06-16 19:14:03 +00:00
// datacenterQueryLimit sets the max number of DCs that a Nomad
// Server will query to find bootstrap_expect servers.
datacenterQueryLimit = 25
// maxStaleLeadership is the maximum time we will permit this Nomad
// Server to go without seeing a valid Raft leader.
maxStaleLeadership = 15 * time . Second
// peersPollInterval is used as the polling interval between attempts
// to query Consul for Nomad Servers.
peersPollInterval = 45 * time . Second
// peersPollJitter is used to provide a slight amount of variance to
// the retry interval when querying Consul Servers
peersPollJitterFactor = 2
2016-06-14 05:58:39 +00:00
2015-06-01 15:49:10 +00:00
raftState = "raft/"
2015-06-03 10:58:00 +00:00
serfSnapshot = "serf/snapshot"
2015-06-01 15:49:10 +00:00
snapshotsRetained = 2
2015-06-07 18:50:29 +00:00
// serverRPCCache controls how long we keep an idle connection open to a server
serverRPCCache = 2 * time . Minute
2018-03-11 17:50:39 +00:00
// serverMaxStreams controls how many idle streams we keep open to a server
2015-06-07 18:50:29 +00:00
serverMaxStreams = 64
2015-06-01 15:49:10 +00:00
// raftLogCacheSize is the maximum number of logs to cache in-memory.
2016-05-15 16:41:34 +00:00
// This is used to reduce disk I/O for the recently committed entries.
2015-06-01 15:49:10 +00:00
raftLogCacheSize = 512
2015-06-03 11:25:50 +00:00
// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
// to replicate to gracefully leave the cluster.
raftRemoveGracePeriod = 5 * time . Second
2017-02-01 00:43:57 +00:00
// defaultConsulDiscoveryInterval is how often to poll Consul for new
// servers if there is no leader.
2017-04-14 00:03:07 +00:00
defaultConsulDiscoveryInterval time . Duration = 3 * time . Second
2017-02-01 00:43:57 +00:00
// defaultConsulDiscoveryIntervalRetry is how often to poll Consul for
// new servers if there is no leader and the last Consul query failed.
2017-04-14 00:03:07 +00:00
defaultConsulDiscoveryIntervalRetry time . Duration = 9 * time . Second
2017-08-19 23:49:53 +00:00
// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing and
// construction cost, so we keep the hot objects cached to reduce the ACL token resolution time.
aclCacheSize = 512
2015-06-01 15:49:10 +00:00
)
// Server is Nomad server which manages the job queues,
// schedulers, and notification bus for agents.
type Server struct {
config * Config
2017-12-07 17:07:00 +00:00
2019-10-15 19:14:25 +00:00
logger log . InterceptLogger
2015-06-01 15:49:10 +00:00
2015-06-07 18:50:29 +00:00
// Connection pool to other Nomad servers
2018-01-12 21:58:44 +00:00
connPool * pool . ConnPool
2015-06-07 18:50:29 +00:00
2015-06-03 11:25:50 +00:00
// The raft instance is used among Nomad nodes within the
// region to protect operations that require strong consistency
2015-06-01 15:49:10 +00:00
raft * raft . Raft
raftLayer * RaftLayer
raftStore * raftboltdb . BoltStore
2015-06-01 19:11:40 +00:00
raftInmem * raft . InmemStore
2015-06-01 15:49:10 +00:00
raftTransport * raft . NetworkTransport
2020-06-09 16:00:09 +00:00
// reassertLeaderCh is used to signal that the leader loop must
// re-establish leadership.
//
// This might be relevant in snapshot restores, where leader in-memory
// state changed significantly such that leader state (e.g. periodic
// jobs, eval brokers) need to be recomputed.
2020-06-04 22:11:17 +00:00
reassertLeaderCh chan chan error
2017-12-18 21:16:23 +00:00
// autopilot is the Autopilot instance for this server.
autopilot * autopilot . Autopilot
2015-06-01 15:49:10 +00:00
// fsm is the state machine used with Raft
fsm * nomadFSM
2015-06-03 10:26:50 +00:00
// rpcListener is used to listen for incoming connections
2017-12-07 17:07:00 +00:00
rpcListener net . Listener
listenerCh chan struct { }
2017-11-28 17:33:46 +00:00
2018-02-15 23:03:12 +00:00
// tlsWrap is used to wrap outbound connections using TLS. It should be
// accessed using the lock.
tlsWrap tlsutil . RegionWrapper
tlsWrapLock sync . RWMutex
2018-09-15 23:23:13 +00:00
// TODO(alex,hclog): Can I move more into the handler?
// rpcHandler is used to serve and handle RPCs
* rpcHandler
2018-01-03 22:59:52 +00:00
// rpcServer is the static RPC server that is used by the local agent.
rpcServer * rpc . Server
2018-03-16 22:24:49 +00:00
// clientRpcAdvertise is the advertised RPC address for Nomad clients to connect
// to this server
clientRpcAdvertise net . Addr
// serverRpcAdvertise is the advertised RPC address for Nomad servers to connect
// to this server
serverRpcAdvertise net . Addr
2015-06-03 10:26:50 +00:00
// rpcTLS is the TLS config for incoming TLS requests
2017-12-07 17:07:00 +00:00
rpcTLS * tls . Config
rpcCancel context . CancelFunc
2015-06-03 10:26:50 +00:00
2018-01-03 22:59:52 +00:00
// staticEndpoints is the set of static endpoints that can be reused across
// all RPC connections
staticEndpoints endpoints
2018-02-05 21:32:39 +00:00
// streamingRpcs is the registry holding our streaming RPC handlers.
2018-03-11 18:41:13 +00:00
streamingRpcs * structs . StreamingRpcRegistry
2018-01-19 00:51:49 +00:00
2018-01-05 21:50:04 +00:00
// nodeConns is the set of multiplexed node connections we have keyed by
// NodeID
2018-04-26 20:22:09 +00:00
nodeConns map [ string ] [ ] * nodeConnState
2018-01-05 21:50:04 +00:00
nodeConnsLock sync . RWMutex
2015-06-04 10:33:12 +00:00
// peers is used to track the known Nomad servers. This is
// used for region forwarding and clustering.
2015-06-07 18:32:01 +00:00
peers map [ string ] [ ] * serverParts
2017-02-02 23:49:06 +00:00
localPeers map [ raft . ServerAddress ] * serverParts
2015-06-07 18:32:01 +00:00
peerLock sync . RWMutex
2015-06-04 10:33:12 +00:00
2015-06-03 10:58:00 +00:00
// serf is the Serf cluster containing only Nomad
// servers. This is used for multi-region federation
// and automatic clustering within regions.
serf * serf . Serf
2015-06-04 10:42:56 +00:00
// reconcileCh is used to pass events from the serf handler
// into the leader manager. Mostly used to handle when servers
// join/leave from the region.
reconcileCh chan serf . Member
2019-07-02 07:58:02 +00:00
// used to track when the server is ready to serve consistent reads, updated atomically
readyForConsistentReads int32
2015-06-03 10:58:00 +00:00
// eventCh is used to receive events from the serf cluster
eventCh chan serf . Event
2017-06-28 22:35:52 +00:00
// BlockedEvals is used to manage evaluations that are blocked on node
// capacity changes.
blockedEvals * BlockedEvals
// deploymentWatcher is used to watch deployments and their allocations and
2017-08-07 21:13:05 +00:00
// make the required calls to continue to transition the deployment.
2017-06-28 22:35:52 +00:00
deploymentWatcher * deploymentwatcher . Watcher
2018-02-27 00:28:10 +00:00
// nodeDrainer is used to drain allocations from nodes.
2018-03-08 23:08:23 +00:00
nodeDrainer * drainer . NodeDrainer
2018-02-27 00:28:10 +00:00
2020-04-30 13:13:00 +00:00
// volumeWatcher is used to release volume claims
volumeWatcher * volumewatcher . Watcher
2015-07-24 04:44:17 +00:00
// evalBroker is used to manage the in-progress evaluations
// that are waiting to be brokered to a sub-scheduler
evalBroker * EvalBroker
2017-06-28 22:35:52 +00:00
// periodicDispatcher is used to track and create evaluations for periodic jobs.
periodicDispatcher * PeriodicDispatch
2016-01-29 23:31:32 +00:00
2018-09-15 23:23:13 +00:00
// planner is used to mange the submitted allocation plans that are waiting
// to be accessed by the leader
* planner
2015-07-27 22:11:42 +00:00
2018-09-15 23:23:13 +00:00
// nodeHeartbeater is used to track expiration times of node heartbeats. If it
// detects an expired node, the node status is updated to be 'down'.
* nodeHeartbeater
2015-08-23 00:17:13 +00:00
2017-02-01 00:43:57 +00:00
// consulCatalog is used for discovering other Nomad Servers via Consul
consulCatalog consul . CatalogAPI
2016-06-14 05:58:39 +00:00
2020-07-28 20:12:08 +00:00
// consulConfigEntries is used for managing Consul Configuration Entries.
consulConfigEntries ConsulConfigsAPI
2019-12-06 20:46:46 +00:00
// consulACLs is used for managing Consul Service Identity tokens.
consulACLs ConsulACLsAPI
2016-08-10 03:15:13 +00:00
// vault is the client for communicating with Vault.
2016-08-10 20:20:13 +00:00
vault VaultClient
2016-08-10 03:15:13 +00:00
2015-08-23 17:53:53 +00:00
// Worker used for processing
workers [ ] * Worker
2017-08-19 23:49:53 +00:00
// aclCache is used to maintain the parsed ACL objects
aclCache * lru . TwoQueueCache
2017-10-23 19:50:37 +00:00
// leaderAcl is the management ACL token that is valid when resolved by the
// current leader.
leaderAcl string
leaderAclLock sync . Mutex
2019-11-14 13:18:29 +00:00
// clusterIDLock ensures the server does not try to concurrently establish
// a cluster ID, racing against itself in calls of ClusterID
clusterIDLock sync . Mutex
2017-12-18 21:16:23 +00:00
// statsFetcher is used by autopilot to check the status of the other
// Nomad router.
statsFetcher * StatsFetcher
2017-09-19 14:47:10 +00:00
// EnterpriseState is used to fill in state for Pro/Ent builds
EnterpriseState
2015-06-03 11:25:50 +00:00
left bool
2015-06-01 15:49:10 +00:00
shutdown bool
shutdownLock sync . Mutex
2019-03-12 21:25:14 +00:00
shutdownCtx context . Context
shutdownCancel context . CancelFunc
shutdownCh <- chan struct { }
2015-06-01 15:49:10 +00:00
}
2015-06-03 10:26:50 +00:00
// Holds the RPC endpoints
type endpoints struct {
2017-08-10 19:24:11 +00:00
Status * Status
Node * Node
Job * Job
Eval * Eval
Plan * Plan
Alloc * Alloc
2019-11-06 21:51:16 +00:00
CSIVolume * CSIVolume
2020-01-28 15:28:34 +00:00
CSIPlugin * CSIPlugin
2017-08-10 19:24:11 +00:00
Deployment * Deployment
Region * Region
Search * Search
Periodic * Periodic
System * System
Operator * Operator
2017-08-08 00:10:04 +00:00
ACL * ACL
2020-01-16 20:12:52 +00:00
Scaling * Scaling
2017-09-07 23:56:15 +00:00
Enterprise * EnterpriseEndpoints
2020-09-28 14:13:10 +00:00
Event * Event
2018-01-11 23:58:59 +00:00
// Client endpoints
2020-04-02 20:04:56 +00:00
ClientStats * ClientStats
FileSystem * FileSystem
Agent * Agent
ClientAllocations * ClientAllocations
ClientCSI * ClientCSI
2015-06-03 10:26:50 +00:00
}
2015-06-01 15:49:10 +00:00
// NewServer is used to construct a new Nomad server from the
// configuration, potentially returning an error
2020-07-28 20:12:08 +00:00
func NewServer ( config * Config , consulCatalog consul . CatalogAPI , consulConfigEntries consul . ConfigAPI , consulACLs consul . ACLsAPI ) ( * Server , error ) {
2015-06-01 15:49:10 +00:00
// Check the protocol version
if err := config . CheckVersion ( ) ; err != nil {
return nil , err
}
2015-07-24 04:44:17 +00:00
// Create an eval broker
2017-04-14 22:24:55 +00:00
evalBroker , err := NewEvalBroker (
config . EvalNackTimeout ,
config . EvalNackInitialReenqueueDelay ,
config . EvalNackSubsequentReenqueueDelay ,
config . EvalDeliveryLimit )
2015-07-24 04:44:17 +00:00
if err != nil {
return nil , err
}
2016-10-24 05:22:00 +00:00
// Configure TLS
2018-05-23 21:25:30 +00:00
tlsConf , err := tlsutil . NewTLSConfiguration ( config . TLSConfig , true , true )
if err != nil {
return nil , err
}
2020-01-28 17:58:38 +00:00
incomingTLS , tlsWrap , err := getTLSConf ( config . TLSConfig . EnableRPC , tlsConf , config . Region )
2017-11-29 17:54:05 +00:00
if err != nil {
return nil , err
2016-10-24 05:22:00 +00:00
}
2017-08-19 23:49:53 +00:00
// Create the ACL object cache
aclCache , err := lru . New2Q ( aclCacheSize )
if err != nil {
return nil , err
}
2018-09-17 21:22:40 +00:00
// Create the logger
2019-10-15 19:14:25 +00:00
logger := config . Logger . ResetNamedIntercept ( "nomad" )
2018-09-17 21:22:40 +00:00
2015-06-01 15:49:10 +00:00
// Create the server
s := & Server {
2020-06-04 22:11:17 +00:00
config : config ,
consulCatalog : consulCatalog ,
connPool : pool . NewPool ( logger , serverRPCCache , serverMaxStreams , tlsWrap ) ,
logger : logger ,
tlsWrap : tlsWrap ,
rpcServer : rpc . NewServer ( ) ,
streamingRpcs : structs . NewStreamingRpcRegistry ( ) ,
nodeConns : make ( map [ string ] [ ] * nodeConnState ) ,
peers : make ( map [ string ] [ ] * serverParts ) ,
localPeers : make ( map [ raft . ServerAddress ] * serverParts ) ,
reassertLeaderCh : make ( chan chan error ) ,
reconcileCh : make ( chan serf . Member , 32 ) ,
eventCh : make ( chan serf . Event , 256 ) ,
evalBroker : evalBroker ,
blockedEvals : NewBlockedEvals ( evalBroker , logger ) ,
rpcTLS : incomingTLS ,
aclCache : aclCache ,
2015-06-01 15:49:10 +00:00
}
2019-03-12 21:25:14 +00:00
s . shutdownCtx , s . shutdownCancel = context . WithCancel ( context . Background ( ) )
s . shutdownCh = s . shutdownCtx . Done ( )
2018-09-15 23:23:13 +00:00
// Create the RPC handler
s . rpcHandler = newRpcHandler ( s )
// Create the planner
planner , err := newPlanner ( s )
if err != nil {
return nil , err
}
s . planner = planner
// Create the node heartbeater
s . nodeHeartbeater = newNodeHeartbeater ( s )
2015-12-18 20:26:28 +00:00
// Create the periodic dispatcher for launching periodic jobs.
2015-12-19 01:26:05 +00:00
s . periodicDispatcher = NewPeriodicDispatch ( s . logger , s )
2015-12-18 20:26:28 +00:00
2017-12-18 21:16:23 +00:00
// Initialize the stats fetcher that autopilot will use.
2018-09-15 23:23:13 +00:00
s . statsFetcher = NewStatsFetcher ( s . logger , s . connPool , s . config . Region )
2017-12-18 21:16:23 +00:00
2019-12-06 20:46:46 +00:00
// Setup Consul (more)
2020-07-28 20:12:08 +00:00
s . setupConsul ( consulConfigEntries , consulACLs )
2019-12-06 20:46:46 +00:00
2016-08-10 20:20:13 +00:00
// Setup Vault
if err := s . setupVaultClient ( ) ; err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to setup Vault client" , "error" , err )
2016-08-10 20:20:13 +00:00
return nil , fmt . Errorf ( "Failed to setup Vault client: %v" , err )
}
2015-06-03 10:26:50 +00:00
// Initialize the RPC layer
2016-10-24 05:22:00 +00:00
if err := s . setupRPC ( tlsWrap ) ; err != nil {
2015-06-03 10:26:50 +00:00
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start RPC layer" , "error" , err )
2015-06-03 10:26:50 +00:00
return nil , fmt . Errorf ( "Failed to start RPC layer: %v" , err )
}
2015-06-01 15:49:10 +00:00
// Initialize the Raft server
if err := s . setupRaft ( ) ; err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start Raft" , "error" , err )
2015-06-01 15:49:10 +00:00
return nil , fmt . Errorf ( "Failed to start Raft: %v" , err )
}
2015-06-03 10:58:00 +00:00
// Initialize the wan Serf
s . serf , err = s . setupSerf ( config . SerfConfig , s . eventCh , serfSnapshot )
if err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start serf WAN" , "error" , err )
2015-06-03 10:58:00 +00:00
return nil , fmt . Errorf ( "Failed to start serf: %v" , err )
}
2016-05-15 16:41:34 +00:00
// Initialize the scheduling workers
2015-07-28 22:12:08 +00:00
if err := s . setupWorkers ( ) ; err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start workers" , "error" , err )
2015-07-28 22:12:08 +00:00
return nil , fmt . Errorf ( "Failed to start workers: %v" , err )
}
2016-06-14 05:58:39 +00:00
// Setup the Consul syncer
if err := s . setupConsulSyncer ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to create server consul syncer" , "error" , err )
2018-09-15 23:42:38 +00:00
return nil , fmt . Errorf ( "failed to create server Consul syncer: %v" , err )
2016-06-14 05:58:39 +00:00
}
2017-06-28 22:35:52 +00:00
// Setup the deployment watcher.
if err := s . setupDeploymentWatcher ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to create deployment watcher" , "error" , err )
2017-06-28 22:35:52 +00:00
return nil , fmt . Errorf ( "failed to create deployment watcher: %v" , err )
}
2020-04-30 13:13:00 +00:00
// Setup the volume watcher
if err := s . setupVolumeWatcher ( ) ; err != nil {
s . logger . Error ( "failed to create volume watcher" , "error" , err )
return nil , fmt . Errorf ( "failed to create volume watcher: %v" , err )
}
2018-02-27 00:28:10 +00:00
// Setup the node drainer.
s . setupNodeDrainer ( )
2017-09-19 14:47:10 +00:00
// Setup the enterprise state
if err := s . setupEnterprise ( config ) ; err != nil {
return nil , err
}
2015-09-07 17:46:41 +00:00
// Monitor leadership changes
go s . monitorLeadership ( )
// Start ingesting events for Serf
go s . serfEventHandler ( )
2017-12-07 17:07:00 +00:00
// start the RPC listener for the server
2017-11-30 19:52:13 +00:00
s . startRPCListener ( )
2015-06-03 10:26:50 +00:00
2015-07-27 22:11:42 +00:00
// Emit metrics for the eval broker
2015-08-05 23:45:50 +00:00
go evalBroker . EmitStats ( time . Second , s . shutdownCh )
2015-07-24 05:17:37 +00:00
2015-07-27 22:11:42 +00:00
// Emit metrics for the plan queue
2018-09-15 23:23:13 +00:00
go s . planQueue . EmitStats ( time . Second , s . shutdownCh )
2015-07-27 22:11:42 +00:00
2016-02-01 02:46:45 +00:00
// Emit metrics for the blocked eval tracker.
2018-11-07 18:08:23 +00:00
go s . blockedEvals . EmitStats ( time . Second , s . shutdownCh )
2016-02-01 02:46:45 +00:00
2017-02-15 00:02:18 +00:00
// Emit metrics for the Vault client.
go s . vault . EmitStats ( time . Second , s . shutdownCh )
2015-08-23 00:17:13 +00:00
// Emit metrics
go s . heartbeatStats ( )
2019-06-14 21:30:27 +00:00
// Emit raft and state store metrics
2019-06-19 16:58:46 +00:00
go s . EmitRaftStats ( 10 * time . Second , s . shutdownCh )
2019-06-14 21:30:27 +00:00
2017-09-19 14:47:10 +00:00
// Start enterprise background workers
s . startEnterpriseBackground ( )
2015-06-01 15:49:10 +00:00
// Done
return s , nil
}
2018-03-30 22:49:56 +00:00
// startRPCListener starts the server's the RPC listener
2017-11-30 19:52:13 +00:00
func ( s * Server ) startRPCListener ( ) {
ctx , cancel := context . WithCancel ( context . Background ( ) )
s . rpcCancel = cancel
2018-03-30 22:49:56 +00:00
go s . listen ( ctx )
2017-11-30 19:52:13 +00:00
}
2017-12-07 17:07:00 +00:00
// createRPCListener creates the server's RPC listener
2018-01-16 16:55:11 +00:00
func ( s * Server ) createRPCListener ( ) ( * net . TCPListener , error ) {
2017-11-30 19:52:13 +00:00
s . listenerCh = make ( chan struct { } )
2018-01-16 16:55:11 +00:00
listener , err := net . ListenTCP ( "tcp" , s . config . RPCAddr )
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to initialize TLS listener" , "error" , err )
2018-01-16 16:55:11 +00:00
return listener , err
2017-11-30 19:52:13 +00:00
}
2018-01-16 16:55:11 +00:00
s . rpcListener = listener
return listener , nil
2017-11-30 19:52:13 +00:00
}
2017-12-07 17:07:00 +00:00
// getTLSConf gets the server's TLS configuration based on the config supplied
// by the operator
2020-01-28 17:58:38 +00:00
func getTLSConf ( enableRPC bool , tlsConf * tlsutil . Config , region string ) ( * tls . Config , tlsutil . RegionWrapper , error ) {
2017-11-20 15:38:46 +00:00
var tlsWrap tlsutil . RegionWrapper
var incomingTLS * tls . Config
2020-01-28 17:58:38 +00:00
if ! enableRPC {
return incomingTLS , tlsWrap , nil
}
2017-11-20 15:38:46 +00:00
2020-01-28 17:58:38 +00:00
tlsWrap , err := tlsConf . OutgoingTLSWrapper ( )
if err != nil {
return nil , nil , err
}
itls , err := tlsConf . IncomingTLSConfig ( )
if err != nil {
return nil , nil , err
}
if tlsConf . VerifyServerHostname {
incomingTLS = itls . Clone ( )
incomingTLS . VerifyPeerCertificate = rpcNameAndRegionValidator ( region )
} else {
2017-11-20 15:38:46 +00:00
incomingTLS = itls
}
2017-11-28 22:25:16 +00:00
return incomingTLS , tlsWrap , nil
}
2020-01-28 17:58:38 +00:00
// implements signature of tls.Config.VerifyPeerCertificate which is called
// after the certs have been verified. We'll ignore the raw certs and only
// check the verified certs.
func rpcNameAndRegionValidator ( region string ) func ( [ ] [ ] byte , [ ] [ ] * x509 . Certificate ) error {
return func ( _ [ ] [ ] byte , certificates [ ] [ ] * x509 . Certificate ) error {
if len ( certificates ) > 0 && len ( certificates [ 0 ] ) > 0 {
cert := certificates [ 0 ] [ 0 ]
for _ , dnsName := range cert . DNSNames {
if validateRPCRegionPeer ( dnsName , region ) {
return nil
}
}
if validateRPCRegionPeer ( cert . Subject . CommonName , region ) {
return nil
}
}
return errors . New ( "invalid role or region for certificate" )
}
}
func validateRPCRegionPeer ( name , region string ) bool {
parts := strings . Split ( name , "." )
if len ( parts ) < 3 {
// Invalid SAN
return false
}
if parts [ len ( parts ) - 1 ] != "nomad" {
// Incorrect service
return false
}
if parts [ 0 ] == "client" {
// Clients may only connect to servers in their region
return name == "client." + region + ".nomad"
}
// Servers may connect to any Nomad RPC service for federation.
return parts [ 0 ] == "server"
}
2017-12-07 17:07:00 +00:00
// reloadTLSConnections updates a server's TLS configuration and reloads RPC
// connections.
2017-12-05 00:29:43 +00:00
func ( s * Server ) reloadTLSConnections ( newTLSConfig * config . TLSConfig ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "reloading server connections due to configuration changes" )
2017-11-28 22:25:16 +00:00
2018-03-30 22:49:56 +00:00
// Check if we can reload the RPC listener
if s . rpcListener == nil || s . rpcCancel == nil {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "unable to reload configuration due to uninitialized rpc listner" )
2018-03-30 22:49:56 +00:00
return fmt . Errorf ( "can't reload uninitialized RPC listener" )
}
2018-05-23 21:25:30 +00:00
tlsConf , err := tlsutil . NewTLSConfiguration ( newTLSConfig , true , true )
2018-05-08 20:32:07 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to create TLS configuration" , "error" , err )
2018-05-08 20:32:07 +00:00
return err
}
2020-01-28 17:58:38 +00:00
incomingTLS , tlsWrap , err := getTLSConf ( newTLSConfig . EnableRPC , tlsConf , s . config . Region )
2017-11-29 17:54:05 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to reset TLS context" , "error" , err )
2017-11-29 17:54:05 +00:00
return err
}
2017-11-20 15:38:46 +00:00
2018-02-15 23:03:12 +00:00
// Store the new tls wrapper.
s . tlsWrapLock . Lock ( )
s . tlsWrap = tlsWrap
s . tlsWrapLock . Unlock ( )
2018-01-16 16:55:11 +00:00
// Keeping configuration in sync is important for other places that require
// access to config information, such as rpc.go, where we decide on what kind
// of network connections to accept depending on the server configuration
2017-11-29 22:22:21 +00:00
s . config . TLSConfig = newTLSConfig
2018-03-30 22:49:56 +00:00
// Kill any old listeners
s . rpcCancel ( )
2017-11-20 15:38:46 +00:00
s . rpcTLS = incomingTLS
s . connPool . ReloadTLS ( tlsWrap )
2018-03-30 18:05:00 +00:00
if err := s . rpcListener . Close ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to close rpc listener" , "error" , err )
2018-03-30 18:05:00 +00:00
return err
}
2018-03-30 22:49:56 +00:00
// Wait for the old listener to exit
2017-11-30 19:52:13 +00:00
<- s . listenerCh
2017-11-28 17:33:46 +00:00
2018-03-30 22:49:56 +00:00
// Create the new listener with the update TLS config
2018-01-16 16:55:11 +00:00
listener , err := s . createRPCListener ( )
if err != nil {
listener . Close ( )
return err
}
2017-11-20 15:38:46 +00:00
2018-03-30 22:49:56 +00:00
// Start the new RPC listener
2018-03-30 21:15:56 +00:00
s . startRPCListener ( )
2018-01-16 19:16:35 +00:00
// Close and reload existing Raft connections
2017-11-27 16:42:52 +00:00
wrapper := tlsutil . RegionSpecificWrapper ( s . config . Region , tlsWrap )
2018-01-19 10:12:14 +00:00
s . raftLayer . ReloadTLS ( wrapper )
s . raftTransport . CloseStreams ( )
2017-11-27 16:42:52 +00:00
2018-09-15 23:23:13 +00:00
s . logger . Debug ( "finished reloading server connections" )
2017-11-20 15:38:46 +00:00
return nil
}
2015-06-01 15:49:10 +00:00
// Shutdown is used to shutdown the server
func ( s * Server ) Shutdown ( ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "shutting down server" )
2015-06-01 15:49:10 +00:00
s . shutdownLock . Lock ( )
defer s . shutdownLock . Unlock ( )
if s . shutdown {
return nil
}
s . shutdown = true
2019-03-12 21:25:14 +00:00
s . shutdownCancel ( )
2015-06-01 15:49:10 +00:00
2015-06-05 21:54:45 +00:00
if s . serf != nil {
s . serf . Shutdown ( )
}
2015-06-01 15:49:10 +00:00
if s . raft != nil {
s . raftTransport . Close ( )
s . raftLayer . Close ( )
future := s . raft . Shutdown ( )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "error shutting down raft" , "error" , err )
2015-06-01 15:49:10 +00:00
}
2015-06-03 09:26:49 +00:00
if s . raftStore != nil {
s . raftStore . Close ( )
}
2015-06-01 15:49:10 +00:00
}
2015-06-03 10:26:50 +00:00
// Shutdown the RPC listener
if s . rpcListener != nil {
s . rpcListener . Close ( )
}
2015-06-07 18:50:29 +00:00
// Close the connection pool
s . connPool . Shutdown ( )
2015-06-01 15:49:10 +00:00
// Close the fsm
if s . fsm != nil {
s . fsm . Close ( )
}
2016-08-10 20:20:13 +00:00
2020-01-02 15:03:05 +00:00
// Stop Vault token renewal and revocations
2016-08-11 20:04:56 +00:00
if s . vault != nil {
s . vault . Stop ( )
}
2016-08-10 20:20:13 +00:00
2020-01-02 15:03:05 +00:00
// Stop the Consul ACLs token revocations
s . consulACLs . Stop ( )
2020-07-28 20:12:08 +00:00
// Stop being able to set Configuration Entries
s . consulConfigEntries . Stop ( )
2015-06-01 15:49:10 +00:00
return nil
}
2015-07-28 22:12:08 +00:00
// IsShutdown checks if the server is shutdown
func ( s * Server ) IsShutdown ( ) bool {
select {
case <- s . shutdownCh :
return true
default :
return false
}
}
2015-06-03 11:25:50 +00:00
// Leave is used to prepare for a graceful shutdown of the server
func ( s * Server ) Leave ( ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "server starting leave" )
2015-06-03 11:25:50 +00:00
s . left = true
// Check the number of known peers
2017-02-02 21:52:31 +00:00
numPeers , err := s . numPeers ( )
2015-06-03 11:25:50 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to check raft peers during leave" , "error" , err )
2015-06-03 11:25:50 +00:00
return err
}
2017-02-03 00:07:15 +00:00
addr := s . raftTransport . LocalAddr ( )
2015-06-03 11:25:50 +00:00
// If we are the current leader, and we have any other peers (cluster has multiple
// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
// not the leader, then we should issue our leave intention and wait to be removed
// for some sane period of time.
isLeader := s . IsLeader ( )
2017-02-02 21:52:31 +00:00
if isLeader && numPeers > 1 {
2018-01-16 21:35:32 +00:00
minRaftProtocol , err := s . autopilot . MinRaftProtocol ( )
if err != nil {
return err
}
if minRaftProtocol >= 2 && s . config . RaftConfig . ProtocolVersion >= 3 {
future := s . raft . RemoveServer ( raft . ServerID ( s . config . NodeID ) , 0 , 0 )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to remove ourself as raft peer" , "error" , err )
2018-01-16 21:35:32 +00:00
}
} else {
future := s . raft . RemovePeer ( addr )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to remove ourself as raft peer" , "error" , err )
2018-01-16 21:35:32 +00:00
}
2015-06-03 11:25:50 +00:00
}
}
// Leave the gossip pool
if s . serf != nil {
if err := s . serf . Leave ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to leave Serf cluster" , "error" , err )
2015-06-03 11:25:50 +00:00
}
}
// If we were not leader, wait to be safely removed from the cluster.
// We must wait to allow the raft replication to take place, otherwise
// an immediate shutdown could cause a loss of quorum.
if ! isLeader {
2017-02-03 00:07:15 +00:00
left := false
2015-06-03 11:25:50 +00:00
limit := time . Now ( ) . Add ( raftRemoveGracePeriod )
2017-02-03 00:07:15 +00:00
for ! left && time . Now ( ) . Before ( limit ) {
// Sleep a while before we check.
time . Sleep ( 50 * time . Millisecond )
2015-06-03 11:25:50 +00:00
2017-02-03 00:07:15 +00:00
// Get the latest configuration.
future := s . raft . GetConfiguration ( )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to get raft configuration" , "error" , err )
2015-06-03 11:25:50 +00:00
break
}
2017-02-03 00:07:15 +00:00
// See if we are no longer included.
left = true
for _ , server := range future . Configuration ( ) . Servers {
if server . Address == addr {
left = false
break
}
}
2015-06-03 11:25:50 +00:00
}
2017-02-03 00:07:15 +00:00
// TODO (alexdadgar) With the old Raft library we used to force the
// peers set to empty when a graceful leave occurred. This would
// keep voting spam down if the server was restarted, but it was
// dangerous because the peers was inconsistent with the logs and
// snapshots, so it wasn't really safe in all cases for the server
// to become leader. This is now safe, but the log spam is noisy.
// The next new version of the library will have a "you are not a
// peer stop it" behavior that should address this. We will have
// to evaluate during the RC period if this interim situation is
// not too confusing for operators.
// TODO (alexdadgar) When we take a later new version of the Raft
// library it won't try to complete replication, so this peer
// may not realize that it has been removed. Need to revisit this
// and the warning here.
if ! left {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "failed to leave raft configuration gracefully, timeout" )
2015-06-03 11:25:50 +00:00
}
}
return nil
}
2017-11-20 15:38:46 +00:00
// Reload handles a config reload specific to server-only configuration. Not
// all config fields can handle a reload.
func ( s * Server ) Reload ( newConfig * Config ) error {
if newConfig == nil {
2017-02-01 22:20:14 +00:00
return fmt . Errorf ( "Reload given a nil config" )
}
var mErr multierror . Error
// Handle the Vault reload. Vault should never be nil but just guard.
if s . vault != nil {
2017-11-20 15:38:46 +00:00
if err := s . vault . SetConfig ( newConfig . VaultConfig ) ; err != nil {
2017-02-01 22:20:14 +00:00
multierror . Append ( & mErr , err )
}
}
2018-06-08 18:33:58 +00:00
shouldReloadTLS , err := tlsutil . ShouldReloadRPCConnections ( s . config . TLSConfig , newConfig . TLSConfig )
2018-03-28 21:54:22 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error checking whether to reload TLS configuration" , "error" , err )
2018-03-28 21:54:22 +00:00
}
2018-06-08 18:33:58 +00:00
if shouldReloadTLS {
2017-12-05 00:29:43 +00:00
if err := s . reloadTLSConnections ( newConfig . TLSConfig ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error reloading server TLS configuration" , "error" , err )
2017-02-01 22:20:14 +00:00
multierror . Append ( & mErr , err )
}
}
return mErr . ErrorOrNil ( )
}
2016-06-16 19:14:03 +00:00
// setupBootstrapHandler() creates the closure necessary to support a Consul
// fallback handler.
func ( s * Server ) setupBootstrapHandler ( ) error {
// peersTimeout is used to indicate to the Consul Syncer that the
// current Nomad Server has a stale peer set. peersTimeout will time
// out if the Consul Syncer bootstrapFn has not observed a Raft
// leader in maxStaleLeadership. If peersTimeout has been triggered,
// the Consul Syncer will begin querying Consul for other Nomad
// Servers.
//
// NOTE: time.Timer is used vs time.Time in order to handle clock
// drift because time.Timer is implemented as a monotonic clock.
var peersTimeout * time . Timer = time . NewTimer ( 0 )
2016-06-16 21:27:10 +00:00
// consulQueryCount is the number of times the bootstrapFn has been
// called, regardless of success.
var consulQueryCount uint64
2016-06-16 19:14:03 +00:00
// leadershipTimedOut is a helper method that returns true if the
// peersTimeout timer has expired.
leadershipTimedOut := func ( ) bool {
select {
case <- peersTimeout . C :
return true
default :
return false
}
}
2016-06-14 05:58:39 +00:00
// The bootstrapFn callback handler is used to periodically poll
// Consul to look up the Nomad Servers in Consul. In the event the
// server has been brought up without a `retry-join` configuration
// and this Server is partitioned from the rest of the cluster,
// periodically poll Consul to reattach this Server to other servers
// in the same region and automatically reform a quorum (assuming the
// correct number of servers required for quorum are present).
bootstrapFn := func ( ) error {
2016-06-14 07:07:04 +00:00
// If there is a raft leader, do nothing
if s . raft . Leader ( ) != "" {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( maxStaleLeadership )
2016-06-14 07:07:04 +00:00
return nil
}
2016-06-16 19:14:03 +00:00
// (ab)use serf.go's behavior of setting BootstrapExpect to
// zero if we have bootstrapped. If we have bootstrapped
2020-03-02 15:29:24 +00:00
bootstrapExpect := s . config . BootstrapExpect
2016-06-16 19:14:03 +00:00
if bootstrapExpect == 0 {
// This Nomad Server has been bootstrapped. Rely on
2016-06-16 21:27:10 +00:00
// the peersTimeout firing as a guard to prevent
// aggressive querying of Consul.
2016-06-16 19:14:03 +00:00
if ! leadershipTimedOut ( ) {
return nil
}
} else {
2016-06-16 21:27:10 +00:00
if consulQueryCount > 0 && ! leadershipTimedOut ( ) {
return nil
}
2016-06-16 19:14:03 +00:00
// This Nomad Server has not been bootstrapped, reach
// out to Consul if our peer list is less than
// `bootstrap_expect`.
2017-02-02 21:52:31 +00:00
raftPeers , err := s . numPeers ( )
2016-06-16 19:14:03 +00:00
if err != nil {
peersTimeout . Reset ( peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
return nil
}
// The necessary number of Nomad Servers required for
// quorum has been reached, we do not need to poll
// Consul. Let the normal timeout-based strategy
// take over.
2020-03-02 15:29:24 +00:00
if raftPeers >= bootstrapExpect {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
return nil
}
2016-06-14 05:58:39 +00:00
}
2016-06-16 21:27:10 +00:00
consulQueryCount ++
2016-06-14 05:58:39 +00:00
2018-09-15 23:23:13 +00:00
s . logger . Debug ( "lost contact with Nomad quorum, falling back to Consul for server list" )
2016-06-14 05:58:39 +00:00
2017-02-01 00:43:57 +00:00
dcs , err := s . consulCatalog . Datacenters ( )
2016-06-14 05:58:39 +00:00
if err != nil {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2017-02-03 00:07:15 +00:00
return fmt . Errorf ( "server.nomad: unable to query Consul datacenters: %v" , err )
2016-06-14 05:58:39 +00:00
}
if len ( dcs ) > 2 {
// Query the local DC first, then shuffle the
// remaining DCs. If additional calls to bootstrapFn
// are necessary, this Nomad Server will eventually
// walk all datacenter until it finds enough hosts to
// form a quorum.
2016-06-21 21:26:01 +00:00
shuffleStrings ( dcs [ 1 : ] )
dcs = dcs [ 0 : lib . MinInt ( len ( dcs ) , datacenterQueryLimit ) ]
2016-06-14 05:58:39 +00:00
}
nomadServerServiceName := s . config . ConsulConfig . ServerServiceName
var mErr multierror . Error
const defaultMaxNumNomadServers = 8
nomadServerServices := make ( [ ] string , 0 , defaultMaxNumNomadServers )
2016-06-16 21:27:10 +00:00
localNode := s . serf . Memberlist ( ) . LocalNode ( )
2016-06-14 05:58:39 +00:00
for _ , dc := range dcs {
2016-06-16 19:14:03 +00:00
consulOpts := & consulapi . QueryOptions {
2016-06-14 05:58:39 +00:00
AllowStale : true ,
Datacenter : dc ,
Near : "_agent" ,
WaitTime : consul . DefaultQueryWaitDuration ,
}
2017-02-01 00:43:57 +00:00
consulServices , _ , err := s . consulCatalog . Service ( nomadServerServiceName , consul . ServiceTagSerf , consulOpts )
2016-06-14 05:58:39 +00:00
if err != nil {
2016-06-16 21:40:09 +00:00
err := fmt . Errorf ( "failed to query service %q in Consul datacenter %q: %v" , nomadServerServiceName , dc , err )
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "failed to query Nomad service in Consul datacenter" , "service_name" , nomadServerServiceName , "dc" , dc , "error" , err )
2016-06-16 21:40:09 +00:00
mErr . Errors = append ( mErr . Errors , err )
2016-06-14 05:58:39 +00:00
continue
}
for _ , cs := range consulServices {
port := strconv . FormatInt ( int64 ( cs . ServicePort ) , 10 )
addr := cs . ServiceAddress
if addr == "" {
addr = cs . Address
}
2016-06-16 21:27:10 +00:00
if localNode . Addr . String ( ) == addr && int ( localNode . Port ) == cs . ServicePort {
continue
}
2016-06-14 05:58:39 +00:00
serverAddr := net . JoinHostPort ( addr , port )
nomadServerServices = append ( nomadServerServices , serverAddr )
}
}
2016-06-14 07:07:04 +00:00
2016-06-14 05:58:39 +00:00
if len ( nomadServerServices ) == 0 {
if len ( mErr . Errors ) > 0 {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-14 05:58:39 +00:00
return mErr . ErrorOrNil ( )
}
2016-06-14 07:07:04 +00:00
// Log the error and return nil so future handlers
// can attempt to register the `nomad` service.
2016-06-16 21:27:10 +00:00
pollInterval := peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor )
2018-09-15 23:23:13 +00:00
s . logger . Trace ( "no Nomad Servers advertising Nomad service in Consul datacenters" , "service_name" , nomadServerServiceName , "datacenters" , dcs , "retry" , pollInterval )
2016-06-16 21:27:10 +00:00
peersTimeout . Reset ( pollInterval )
2016-06-14 07:07:04 +00:00
return nil
2016-06-14 05:58:39 +00:00
}
2016-06-14 07:07:04 +00:00
2016-06-14 05:58:39 +00:00
numServersContacted , err := s . Join ( nomadServerServices )
if err != nil {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( peersPollInterval + lib . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-14 05:58:39 +00:00
return fmt . Errorf ( "contacted %d Nomad Servers: %v" , numServersContacted , err )
}
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( maxStaleLeadership )
2018-09-15 23:23:13 +00:00
s . logger . Info ( "successfully contacted Nomad servers" , "num_servers" , numServersContacted )
2016-06-14 05:58:39 +00:00
return nil
}
2016-06-16 19:14:03 +00:00
2017-02-01 00:43:57 +00:00
// Hacky replacement for old ConsulSyncer Periodic Handler.
go func ( ) {
lastOk := true
sync := time . NewTimer ( 0 )
for {
select {
case <- sync . C :
d := defaultConsulDiscoveryInterval
if err := bootstrapFn ( ) ; err != nil {
// Only log if it worked last time
if lastOk {
lastOk = false
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error looking up Nomad servers in Consul" , "error" , err )
2017-02-01 00:43:57 +00:00
}
d = defaultConsulDiscoveryIntervalRetry
}
sync . Reset ( d )
case <- s . shutdownCh :
return
}
}
} ( )
2016-06-16 19:14:03 +00:00
return nil
}
// setupConsulSyncer creates Server-mode consul.Syncer which periodically
// executes callbacks on a fixed interval.
func ( s * Server ) setupConsulSyncer ( ) error {
2017-01-18 23:55:14 +00:00
if s . config . ConsulConfig . ServerAutoJoin != nil && * s . config . ConsulConfig . ServerAutoJoin {
2016-06-16 19:14:03 +00:00
if err := s . setupBootstrapHandler ( ) ; err != nil {
2016-06-16 21:40:09 +00:00
return err
2016-06-16 19:14:03 +00:00
}
2016-06-14 22:27:15 +00:00
}
2016-06-14 05:58:39 +00:00
2016-06-16 21:40:09 +00:00
return nil
2016-08-10 20:20:13 +00:00
}
2017-06-28 22:35:52 +00:00
// setupDeploymentWatcher creates a deployment watcher that consumes the RPC
2018-03-11 19:06:05 +00:00
// endpoints for state information and makes transitions via Raft through a
2017-06-28 22:35:52 +00:00
// shim that provides the appropriate methods.
func ( s * Server ) setupDeploymentWatcher ( ) error {
2017-08-31 00:45:32 +00:00
// Create the raft shim type to restrict the set of raft methods that can be
// made
2017-06-28 22:35:52 +00:00
raftShim := & deploymentWatcherRaftShim {
apply : s . raftApply ,
}
2017-07-03 18:26:45 +00:00
// Create the deployment watcher
s . deploymentWatcher = deploymentwatcher . NewDeploymentsWatcher (
2020-06-16 23:23:14 +00:00
s . logger ,
raftShim ,
s . staticEndpoints . Deployment ,
s . staticEndpoints . Job ,
2017-07-03 18:26:45 +00:00
deploymentwatcher . LimitStateQueriesPerSecond ,
2020-06-16 23:23:14 +00:00
deploymentwatcher . CrossDeploymentUpdateBatchDuration ,
)
2017-07-03 18:26:45 +00:00
2017-06-28 22:35:52 +00:00
return nil
}
2020-08-06 18:31:18 +00:00
// setupVolumeWatcher creates a volume watcher that sends CSI RPCs
2020-04-30 13:13:00 +00:00
func ( s * Server ) setupVolumeWatcher ( ) error {
s . volumeWatcher = volumewatcher . NewVolumesWatcher (
2020-08-07 19:37:27 +00:00
s . logger , s . staticEndpoints . CSIVolume , s . getLeaderAcl ( ) )
2020-04-30 13:13:00 +00:00
return nil
}
2018-02-27 00:28:10 +00:00
// setupNodeDrainer creates a node drainer which will be enabled when a server
// becomes a leader.
func ( s * Server ) setupNodeDrainer ( ) {
2018-03-06 22:37:37 +00:00
// Create a shim around Raft requests
2018-02-27 00:28:10 +00:00
shim := drainerShim { s }
2018-03-08 23:08:23 +00:00
c := & drainer . NodeDrainerConfig {
2018-03-06 22:37:37 +00:00
Logger : s . logger ,
Raft : shim ,
2018-03-08 23:08:23 +00:00
JobFactory : drainer . GetDrainingJobWatcher ,
NodeFactory : drainer . GetNodeWatcherFactory ( ) ,
DrainDeadlineFactory : drainer . GetDeadlineNotifier ,
StateQueriesPerSecond : drainer . LimitStateQueriesPerSecond ,
BatchUpdateInterval : drainer . BatchUpdateInterval ,
2018-03-06 22:37:37 +00:00
}
2018-03-08 23:08:23 +00:00
s . nodeDrainer = drainer . NewNodeDrainer ( c )
2018-02-27 00:28:10 +00:00
}
2019-12-06 20:46:46 +00:00
// setupConsul is used to setup Server specific consul components.
2020-07-28 20:12:08 +00:00
func ( s * Server ) setupConsul ( consulConfigEntries consul . ConfigAPI , consulACLs consul . ACLsAPI ) {
s . consulConfigEntries = NewConsulConfigsAPI ( consulConfigEntries , s . logger )
2020-01-02 15:03:05 +00:00
s . consulACLs = NewConsulACLsAPI ( consulACLs , s . logger , s . purgeSITokenAccessors )
2019-12-06 20:46:46 +00:00
}
2016-08-10 20:20:13 +00:00
// setupVaultClient is used to set up the Vault API client.
func ( s * Server ) setupVaultClient ( ) error {
2020-07-17 14:41:45 +00:00
delegate := s . entVaultDelegate ( )
v , err := NewVaultClient ( s . config . VaultConfig , s . logger , s . purgeVaultAccessors , delegate )
2016-08-10 20:20:13 +00:00
if err != nil {
return err
}
s . vault = v
return nil
2016-06-14 05:58:39 +00:00
}
2015-06-03 10:26:50 +00:00
// setupRPC is used to setup the RPC listener
2016-11-01 18:55:29 +00:00
func ( s * Server ) setupRPC ( tlsWrap tlsutil . RegionWrapper ) error {
2018-01-03 22:59:52 +00:00
// Populate the static RPC server
2018-01-04 00:00:55 +00:00
s . setupRpcServer ( s . rpcServer , nil )
2015-06-03 10:26:50 +00:00
2018-01-16 16:55:11 +00:00
listener , err := s . createRPCListener ( )
2015-06-03 10:26:50 +00:00
if err != nil {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2015-06-03 10:26:50 +00:00
return err
}
2018-03-16 22:24:49 +00:00
if s . config . ClientRPCAdvertise != nil {
s . clientRpcAdvertise = s . config . ClientRPCAdvertise
2015-06-03 10:26:50 +00:00
} else {
2018-03-16 22:24:49 +00:00
s . clientRpcAdvertise = s . rpcListener . Addr ( )
2015-06-03 10:26:50 +00:00
}
// Verify that we have a usable advertise address
2018-03-16 22:53:14 +00:00
clientAddr , ok := s . clientRpcAdvertise . ( * net . TCPAddr )
2015-06-03 10:26:50 +00:00
if ! ok {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2018-03-16 22:53:14 +00:00
return fmt . Errorf ( "Client RPC advertise address is not a TCP Address: %v" , clientAddr )
2015-06-03 10:26:50 +00:00
}
2018-03-16 22:53:14 +00:00
if clientAddr . IP . IsUnspecified ( ) {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2018-03-16 22:53:14 +00:00
return fmt . Errorf ( "Client RPC advertise address is not advertisable: %v" , clientAddr )
}
if s . config . ServerRPCAdvertise != nil {
s . serverRpcAdvertise = s . config . ServerRPCAdvertise
} else {
// Default to the Serf Advertise + RPC Port
serfIP := s . config . SerfConfig . MemberlistConfig . AdvertiseAddr
if serfIP == "" {
serfIP = s . config . SerfConfig . MemberlistConfig . BindAddr
}
addr := net . JoinHostPort ( serfIP , fmt . Sprintf ( "%d" , clientAddr . Port ) )
resolved , err := net . ResolveTCPAddr ( "tcp" , addr )
if err != nil {
return fmt . Errorf ( "Failed to resolve Server RPC advertise address: %v" , err )
}
s . serverRpcAdvertise = resolved
}
// Verify that we have a usable advertise address
serverAddr , ok := s . serverRpcAdvertise . ( * net . TCPAddr )
if ! ok {
return fmt . Errorf ( "Server RPC advertise address is not a TCP Address: %v" , serverAddr )
}
if serverAddr . IP . IsUnspecified ( ) {
listener . Close ( )
return fmt . Errorf ( "Server RPC advertise address is not advertisable: %v" , serverAddr )
2015-06-03 10:26:50 +00:00
}
2016-11-01 18:55:29 +00:00
wrapper := tlsutil . RegionSpecificWrapper ( s . config . Region , tlsWrap )
2018-03-16 22:24:49 +00:00
s . raftLayer = NewRaftLayer ( s . serverRpcAdvertise , wrapper )
2015-06-03 10:26:50 +00:00
return nil
}
2018-01-03 22:59:52 +00:00
// setupRpcServer is used to populate an RPC server with endpoints
2018-01-04 00:00:55 +00:00
func ( s * Server ) setupRpcServer ( server * rpc . Server , ctx * RPCContext ) {
2018-01-03 22:59:52 +00:00
// Add the static endpoints to the RPC server.
if s . staticEndpoints . Status == nil {
// Initialize the list just once
2018-09-15 23:23:13 +00:00
s . staticEndpoints . ACL = & ACL { srv : s , logger : s . logger . Named ( "acl" ) }
s . staticEndpoints . Alloc = & Alloc { srv : s , logger : s . logger . Named ( "alloc" ) }
s . staticEndpoints . Eval = & Eval { srv : s , logger : s . logger . Named ( "eval" ) }
2019-08-15 15:22:37 +00:00
s . staticEndpoints . Job = NewJobEndpoints ( s )
2018-09-15 23:23:13 +00:00
s . staticEndpoints . Node = & Node { srv : s , logger : s . logger . Named ( "client" ) } // Add but don't register
2019-11-06 21:51:16 +00:00
s . staticEndpoints . CSIVolume = & CSIVolume { srv : s , logger : s . logger . Named ( "csi_volume" ) }
2020-01-28 15:28:34 +00:00
s . staticEndpoints . CSIPlugin = & CSIPlugin { srv : s , logger : s . logger . Named ( "csi_plugin" ) }
2018-09-15 23:23:13 +00:00
s . staticEndpoints . Deployment = & Deployment { srv : s , logger : s . logger . Named ( "deployment" ) }
s . staticEndpoints . Operator = & Operator { srv : s , logger : s . logger . Named ( "operator" ) }
2020-05-21 14:05:04 +00:00
s . staticEndpoints . Operator . register ( )
2018-09-15 23:23:13 +00:00
s . staticEndpoints . Periodic = & Periodic { srv : s , logger : s . logger . Named ( "periodic" ) }
s . staticEndpoints . Plan = & Plan { srv : s , logger : s . logger . Named ( "plan" ) }
s . staticEndpoints . Region = & Region { srv : s , logger : s . logger . Named ( "region" ) }
2020-01-16 20:12:52 +00:00
s . staticEndpoints . Scaling = & Scaling { srv : s , logger : s . logger . Named ( "scaling" ) }
2018-09-15 23:23:13 +00:00
s . staticEndpoints . Status = & Status { srv : s , logger : s . logger . Named ( "status" ) }
s . staticEndpoints . System = & System { srv : s , logger : s . logger . Named ( "system" ) }
s . staticEndpoints . Search = & Search { srv : s , logger : s . logger . Named ( "search" ) }
2018-01-03 22:59:52 +00:00
s . staticEndpoints . Enterprise = NewEnterpriseEndpoints ( s )
2018-01-11 23:58:59 +00:00
// Client endpoints
2018-09-15 23:23:13 +00:00
s . staticEndpoints . ClientStats = & ClientStats { srv : s , logger : s . logger . Named ( "client_stats" ) }
s . staticEndpoints . ClientAllocations = & ClientAllocations { srv : s , logger : s . logger . Named ( "client_allocs" ) }
2019-04-28 21:25:27 +00:00
s . staticEndpoints . ClientAllocations . register ( )
2020-04-02 20:04:56 +00:00
s . staticEndpoints . ClientCSI = & ClientCSI { srv : s , logger : s . logger . Named ( "client_csi" ) }
2018-01-22 01:09:20 +00:00
// Streaming endpoints
2018-09-15 23:23:13 +00:00
s . staticEndpoints . FileSystem = & FileSystem { srv : s , logger : s . logger . Named ( "client_fs" ) }
2018-02-01 19:28:52 +00:00
s . staticEndpoints . FileSystem . register ( )
2019-10-15 19:14:25 +00:00
2019-10-30 13:28:24 +00:00
s . staticEndpoints . Agent = & Agent { srv : s }
s . staticEndpoints . Agent . register ( )
2020-09-28 14:13:10 +00:00
s . staticEndpoints . Event = & Event { srv : s }
s . staticEndpoints . Event . register ( )
2018-01-03 22:59:52 +00:00
}
// Register the static handlers
server . Register ( s . staticEndpoints . ACL )
server . Register ( s . staticEndpoints . Alloc )
server . Register ( s . staticEndpoints . Eval )
server . Register ( s . staticEndpoints . Job )
2019-11-06 21:51:16 +00:00
server . Register ( s . staticEndpoints . CSIVolume )
2020-01-28 15:28:34 +00:00
server . Register ( s . staticEndpoints . CSIPlugin )
2018-01-03 22:59:52 +00:00
server . Register ( s . staticEndpoints . Deployment )
server . Register ( s . staticEndpoints . Operator )
server . Register ( s . staticEndpoints . Periodic )
server . Register ( s . staticEndpoints . Plan )
server . Register ( s . staticEndpoints . Region )
2020-01-16 20:12:52 +00:00
server . Register ( s . staticEndpoints . Scaling )
2018-01-03 22:59:52 +00:00
server . Register ( s . staticEndpoints . Status )
server . Register ( s . staticEndpoints . System )
server . Register ( s . staticEndpoints . Search )
s . staticEndpoints . Enterprise . Register ( server )
2018-01-11 23:58:59 +00:00
server . Register ( s . staticEndpoints . ClientStats )
2018-02-06 01:20:42 +00:00
server . Register ( s . staticEndpoints . ClientAllocations )
2020-04-02 20:04:56 +00:00
server . Register ( s . staticEndpoints . ClientCSI )
2018-02-01 19:28:52 +00:00
server . Register ( s . staticEndpoints . FileSystem )
2019-12-09 15:55:43 +00:00
server . Register ( s . staticEndpoints . Agent )
2018-01-03 22:59:52 +00:00
// Create new dynamic endpoints and add them to the RPC server.
2018-09-15 23:23:13 +00:00
node := & Node { srv : s , ctx : ctx , logger : s . logger . Named ( "client" ) }
2018-01-05 21:50:04 +00:00
// Register the dynamic endpoints
server . Register ( node )
2018-01-03 22:59:52 +00:00
}
2015-06-01 15:49:10 +00:00
// setupRaft is used to setup and initialize Raft
func ( s * Server ) setupRaft ( ) error {
2017-02-02 23:31:36 +00:00
// If we have an unclean exit then attempt to close the Raft store.
defer func ( ) {
if s . raft == nil && s . raftStore != nil {
if err := s . raftStore . Close ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to close Raft store" , "error" , err )
2017-02-02 23:31:36 +00:00
}
}
} ( )
2015-06-01 15:49:10 +00:00
// Create the FSM
2017-10-13 21:36:02 +00:00
fsmConfig := & FSMConfig {
2020-10-08 18:27:52 +00:00
EvalBroker : s . evalBroker ,
Periodic : s . periodicDispatcher ,
Blocked : s . blockedEvals ,
Logger : s . logger ,
Region : s . Region ( ) ,
EnableEventBroker : s . config . EnableEventBroker ,
DurableEventCount : s . config . DurableEventCount ,
EventBufferSize : s . config . EventBufferSize ,
2017-10-13 21:36:02 +00:00
}
2015-06-01 15:49:10 +00:00
var err error
2017-10-13 21:36:02 +00:00
s . fsm , err = NewFSM ( fsmConfig )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
2015-06-01 19:11:40 +00:00
// Create a transport layer
2015-08-26 00:36:52 +00:00
trans := raft . NewNetworkTransport ( s . raftLayer , 3 , s . config . RaftTimeout ,
2015-06-01 19:11:40 +00:00
s . config . LogOutput )
s . raftTransport = trans
2015-06-01 15:49:10 +00:00
2018-09-17 20:57:52 +00:00
// Make sure we set the Logger.
2020-02-11 19:41:22 +00:00
s . config . RaftConfig . Logger = s . logger . Named ( "raft" )
2018-09-17 20:57:52 +00:00
s . config . RaftConfig . LogOutput = nil
2017-02-02 23:31:36 +00:00
2019-09-18 21:45:51 +00:00
// Our version of Raft protocol 2 requires the LocalID to match the network
// address of the transport. Raft protocol 3 uses permanent ids.
2017-02-02 23:31:36 +00:00
s . config . RaftConfig . LocalID = raft . ServerID ( trans . LocalAddr ( ) )
2017-11-22 00:29:11 +00:00
if s . config . RaftConfig . ProtocolVersion >= 3 {
s . config . RaftConfig . LocalID = raft . ServerID ( s . config . NodeID )
}
2017-02-02 23:31:36 +00:00
// Build an all in-memory setup for dev mode, otherwise prepare a full
// disk-based setup.
2015-06-01 19:11:40 +00:00
var log raft . LogStore
var stable raft . StableStore
var snap raft . SnapshotStore
if s . config . DevMode {
store := raft . NewInmemStore ( )
s . raftInmem = store
stable = store
log = store
snap = raft . NewDiscardSnapshotStore ( )
} else {
// Create the base raft path
path := filepath . Join ( s . config . DataDir , raftState )
if err := ensurePath ( path , true ) ; err != nil {
return err
}
2015-06-01 15:49:10 +00:00
2015-06-01 19:11:40 +00:00
// Create the BoltDB backend
store , err := raftboltdb . NewBoltStore ( filepath . Join ( path , "raft.db" ) )
if err != nil {
return err
}
s . raftStore = store
stable = store
2015-06-01 15:49:10 +00:00
2015-06-01 19:11:40 +00:00
// Wrap the store in a LogCache to improve performance
cacheStore , err := raft . NewLogCache ( raftLogCacheSize , store )
if err != nil {
store . Close ( )
return err
}
log = cacheStore
2015-06-01 15:49:10 +00:00
2015-06-01 19:11:40 +00:00
// Create the snapshot store
snapshots , err := raft . NewFileSnapshotStore ( path , snapshotsRetained , s . config . LogOutput )
if err != nil {
if s . raftStore != nil {
s . raftStore . Close ( )
}
return err
}
snap = snapshots
2015-06-01 15:49:10 +00:00
2017-02-02 23:31:36 +00:00
// For an existing cluster being upgraded to the new version of
// Raft, we almost never want to run recovery based on the old
// peers.json file. We create a peers.info file with a helpful
// note about where peers.json went, and use that as a sentinel
// to avoid ingesting the old one that first time (if we have to
// create the peers.info file because it's not there, we also
// blow away any existing peers.json file).
peersFile := filepath . Join ( path , "peers.json" )
peersInfoFile := filepath . Join ( path , "peers.info" )
if _ , err := os . Stat ( peersInfoFile ) ; os . IsNotExist ( err ) {
if err := ioutil . WriteFile ( peersInfoFile , [ ] byte ( peersInfoContent ) , 0755 ) ; err != nil {
return fmt . Errorf ( "failed to write peers.info file: %v" , err )
}
// Blow away the peers.json file if present, since the
// peers.info sentinel wasn't there.
if _ , err := os . Stat ( peersFile ) ; err == nil {
if err := os . Remove ( peersFile ) ; err != nil {
return fmt . Errorf ( "failed to delete peers.json, please delete manually (see peers.info for details): %v" , err )
}
2018-09-15 23:23:13 +00:00
s . logger . Info ( "deleted peers.json file (see peers.info for details)" )
2017-02-02 23:31:36 +00:00
}
} else if _ , err := os . Stat ( peersFile ) ; err == nil {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "found peers.json file, recovering Raft configuration..." )
2019-04-29 23:38:27 +00:00
var configuration raft . Configuration
if s . config . RaftConfig . ProtocolVersion < 3 {
configuration , err = raft . ReadPeersJSON ( peersFile )
} else {
configuration , err = raft . ReadConfigJSON ( peersFile )
}
2017-02-02 23:31:36 +00:00
if err != nil {
return fmt . Errorf ( "recovery failed to parse peers.json: %v" , err )
}
2017-10-13 21:36:02 +00:00
tmpFsm , err := NewFSM ( fsmConfig )
2017-02-02 23:31:36 +00:00
if err != nil {
return fmt . Errorf ( "recovery failed to make temp FSM: %v" , err )
}
if err := raft . RecoverCluster ( s . config . RaftConfig , tmpFsm ,
log , stable , snap , trans , configuration ) ; err != nil {
return fmt . Errorf ( "recovery failed: %v" , err )
}
if err := os . Remove ( peersFile ) ; err != nil {
return fmt . Errorf ( "recovery failed to delete peers.json, please delete manually (see peers.info for details): %v" , err )
}
2018-09-15 23:23:13 +00:00
s . logger . Info ( "deleted peers.json file after successful recovery" )
2017-02-02 23:31:36 +00:00
}
2015-06-01 19:11:40 +00:00
}
2015-06-01 15:49:10 +00:00
2020-03-02 15:29:24 +00:00
// If we are a single server cluster and the state is clean then we can
2017-02-02 23:31:36 +00:00
// bootstrap now.
2020-03-02 15:29:24 +00:00
if s . isSingleServerCluster ( ) {
2017-02-02 23:31:36 +00:00
hasState , err := raft . HasExistingState ( log , stable , snap )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
2017-02-02 23:31:36 +00:00
if ! hasState {
configuration := raft . Configuration {
Servers : [ ] raft . Server {
2017-09-26 22:26:33 +00:00
{
2017-11-22 00:29:11 +00:00
ID : s . config . RaftConfig . LocalID ,
2017-02-02 23:31:36 +00:00
Address : trans . LocalAddr ( ) ,
} ,
} ,
}
if err := raft . BootstrapCluster ( s . config . RaftConfig ,
log , stable , snap , trans , configuration ) ; err != nil {
return err
}
2015-06-01 15:49:10 +00:00
}
}
// Setup the Raft store
2017-02-02 23:31:36 +00:00
s . raft , err = raft . NewRaft ( s . config . RaftConfig , s . fsm , log , stable , snap , trans )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
return nil
}
2015-06-03 10:26:50 +00:00
2015-06-03 10:58:00 +00:00
// setupSerf is used to setup and initialize a Serf
func ( s * Server ) setupSerf ( conf * serf . Config , ch chan serf . Event , path string ) ( * serf . Serf , error ) {
conf . Init ( )
conf . NodeName = fmt . Sprintf ( "%s.%s" , s . config . NodeName , s . config . Region )
conf . Tags [ "role" ] = "nomad"
conf . Tags [ "region" ] = s . config . Region
conf . Tags [ "dc" ] = s . config . Datacenter
2016-05-28 01:14:34 +00:00
conf . Tags [ "vsn" ] = fmt . Sprintf ( "%d" , structs . ApiMajorVersion )
conf . Tags [ "mvn" ] = fmt . Sprintf ( "%d" , structs . ApiMinorVersion )
2015-06-03 10:58:00 +00:00
conf . Tags [ "build" ] = s . config . Build
2017-11-22 00:29:11 +00:00
conf . Tags [ "raft_vsn" ] = fmt . Sprintf ( "%d" , s . config . RaftConfig . ProtocolVersion )
conf . Tags [ "id" ] = s . config . NodeID
2018-03-16 22:24:49 +00:00
conf . Tags [ "rpc_addr" ] = s . clientRpcAdvertise . ( * net . TCPAddr ) . IP . String ( ) // Address that clients will use to RPC to servers
conf . Tags [ "port" ] = fmt . Sprintf ( "%d" , s . serverRpcAdvertise . ( * net . TCPAddr ) . Port ) // Port servers use to RPC to one and another
2020-03-02 15:29:24 +00:00
if s . isSingleServerCluster ( ) {
2015-06-03 10:58:00 +00:00
conf . Tags [ "bootstrap" ] = "1"
}
2020-03-02 15:29:24 +00:00
bootstrapExpect := s . config . BootstrapExpect
2016-06-16 19:00:15 +00:00
if bootstrapExpect != 0 {
conf . Tags [ "expect" ] = fmt . Sprintf ( "%d" , bootstrapExpect )
2015-06-03 10:58:00 +00:00
}
2017-12-18 21:16:23 +00:00
if s . config . NonVoter {
conf . Tags [ "nonvoter" ] = "1"
}
2018-01-30 03:53:34 +00:00
if s . config . RedundancyZone != "" {
conf . Tags [ AutopilotRZTag ] = s . config . RedundancyZone
}
if s . config . UpgradeVersion != "" {
conf . Tags [ AutopilotVersionTag ] = s . config . UpgradeVersion
}
2019-10-24 20:55:23 +00:00
logger := s . logger . StandardLoggerIntercept ( & log . StandardLoggerOptions { InferLevels : true } )
2018-09-17 20:57:52 +00:00
conf . MemberlistConfig . Logger = logger
conf . Logger = logger
conf . MemberlistConfig . LogOutput = nil
conf . LogOutput = nil
2015-06-03 10:58:00 +00:00
conf . EventCh = ch
2015-06-04 10:33:12 +00:00
if ! s . config . DevMode {
conf . SnapshotPath = filepath . Join ( s . config . DataDir , path )
if err := ensurePath ( conf . SnapshotPath , false ) ; err != nil {
return nil , err
}
}
2015-06-03 10:58:00 +00:00
conf . ProtocolVersion = protocolVersionMap [ s . config . ProtocolVersion ]
conf . RejoinAfterLeave = true
2018-05-10 16:16:24 +00:00
// LeavePropagateDelay is used to make sure broadcasted leave intents propagate
// This value was tuned using https://www.serf.io/docs/internals/simulator.html to
// allow for convergence in 99.9% of nodes in a 10 node cluster
conf . LeavePropagateDelay = 1 * time . Second
2015-06-03 10:58:00 +00:00
conf . Merge = & serfMergeDelegate { }
// Until Nomad supports this fully, we disable automatic resolution.
// When enabled, the Serf gossip may just turn off if we are the minority
// node which is rather unexpected.
conf . EnableNameConflictResolution = false
return serf . Create ( conf )
}
2015-07-28 22:12:08 +00:00
// setupWorkers is used to start the scheduling workers
func ( s * Server ) setupWorkers ( ) error {
// Check if all the schedulers are disabled
if len ( s . config . EnabledSchedulers ) == 0 || s . config . NumSchedulers == 0 {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "no enabled schedulers" )
2015-07-28 22:12:08 +00:00
return nil
}
2018-03-14 17:37:49 +00:00
// Check if the core scheduler is not enabled
2018-03-14 17:53:27 +00:00
foundCore := false
for _ , sched := range s . config . EnabledSchedulers {
if sched == structs . JobTypeCore {
foundCore = true
continue
}
if _ , ok := scheduler . BuiltinSchedulers [ sched ] ; ! ok {
return fmt . Errorf ( "invalid configuration: unknown scheduler %q in enabled schedulers" , sched )
2018-03-14 17:37:49 +00:00
}
}
2018-03-14 17:53:27 +00:00
if ! foundCore {
return fmt . Errorf ( "invalid configuration: %q scheduler not enabled" , structs . JobTypeCore )
2018-03-14 17:37:49 +00:00
}
2015-07-28 22:12:08 +00:00
// Start the workers
for i := 0 ; i < s . config . NumSchedulers ; i ++ {
2015-08-23 17:53:53 +00:00
if w , err := NewWorker ( s ) ; err != nil {
2015-07-28 22:12:08 +00:00
return err
2015-08-23 17:53:53 +00:00
} else {
s . workers = append ( s . workers , w )
2015-07-28 22:12:08 +00:00
}
}
2018-09-15 23:23:13 +00:00
s . logger . Info ( "starting scheduling worker(s)" , "num_workers" , s . config . NumSchedulers , "schedulers" , s . config . EnabledSchedulers )
2015-07-28 22:12:08 +00:00
return nil
}
2017-02-02 21:52:31 +00:00
// numPeers is used to check on the number of known peers, including the local
// node.
func ( s * Server ) numPeers ( ) ( int , error ) {
future := s . raft . GetConfiguration ( )
if err := future . Error ( ) ; err != nil {
2015-06-03 11:25:50 +00:00
return 0 , err
}
2017-02-02 21:52:31 +00:00
configuration := future . Configuration ( )
return len ( configuration . Servers ) , nil
2015-06-03 11:25:50 +00:00
}
2015-06-03 10:26:50 +00:00
// IsLeader checks if this server is the cluster leader
func ( s * Server ) IsLeader ( ) bool {
return s . raft . State ( ) == raft . Leader
}
2015-06-03 11:25:50 +00:00
// Join is used to have Nomad join the gossip ring
// The target address should be another node listening on the
// Serf address
func ( s * Server ) Join ( addrs [ ] string ) ( int , error ) {
return s . serf . Join ( addrs , true )
}
// LocalMember is used to return the local node
2019-03-16 22:53:53 +00:00
func ( s * Server ) LocalMember ( ) serf . Member {
return s . serf . LocalMember ( )
2015-06-03 11:25:50 +00:00
}
// Members is used to return the members of the serf cluster
func ( s * Server ) Members ( ) [ ] serf . Member {
return s . serf . Members ( )
}
// RemoveFailedNode is used to remove a failed node from the cluster
func ( s * Server ) RemoveFailedNode ( node string ) error {
return s . serf . RemoveFailedNode ( node )
}
// KeyManager returns the Serf keyring manager
func ( s * Server ) KeyManager ( ) * serf . KeyManager {
return s . serf . KeyManager ( )
}
// Encrypted determines if gossip is encrypted
func ( s * Server ) Encrypted ( ) bool {
return s . serf . EncryptionEnabled ( )
}
2015-08-29 21:22:24 +00:00
// State returns the underlying state store. This should *not*
// be used to modify state directly.
func ( s * Server ) State ( ) * state . StateStore {
return s . fsm . State ( )
}
2017-10-23 19:50:37 +00:00
// setLeaderAcl stores the given ACL token as the current leader's ACL token.
func ( s * Server ) setLeaderAcl ( token string ) {
s . leaderAclLock . Lock ( )
s . leaderAcl = token
s . leaderAclLock . Unlock ( )
}
// getLeaderAcl retrieves the leader's ACL token
func ( s * Server ) getLeaderAcl ( ) string {
s . leaderAclLock . Lock ( )
defer s . leaderAclLock . Unlock ( )
return s . leaderAcl
}
2019-07-02 07:58:02 +00:00
// Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
func ( s * Server ) setConsistentReadReady ( ) {
atomic . StoreInt32 ( & s . readyForConsistentReads , 1 )
}
// Atomically reset readiness state flag on leadership revoke
func ( s * Server ) resetConsistentReadReady ( ) {
atomic . StoreInt32 ( & s . readyForConsistentReads , 0 )
}
// Returns true if this server is ready to serve consistent reads
func ( s * Server ) isReadyForConsistentReads ( ) bool {
return atomic . LoadInt32 ( & s . readyForConsistentReads ) == 1
}
2015-11-24 05:47:11 +00:00
// Regions returns the known regions in the cluster.
func ( s * Server ) Regions ( ) [ ] string {
2015-11-24 05:49:03 +00:00
s . peerLock . RLock ( )
defer s . peerLock . RUnlock ( )
2015-11-24 05:47:11 +00:00
regions := make ( [ ] string , 0 , len ( s . peers ) )
2017-09-26 22:26:33 +00:00
for region := range s . peers {
2015-11-24 05:47:11 +00:00
regions = append ( regions , region )
}
2015-11-24 21:15:01 +00:00
sort . Strings ( regions )
2015-11-24 05:47:11 +00:00
return regions
}
2015-06-03 10:26:50 +00:00
// RPC is used to make a local RPC call
func ( s * Server ) RPC ( method string , args interface { } , reply interface { } ) error {
2018-01-11 18:17:23 +00:00
codec := & codec . InmemCodec {
Method : method ,
Args : args ,
Reply : reply ,
2015-06-03 10:26:50 +00:00
}
if err := s . rpcServer . ServeRequest ( codec ) ; err != nil {
return err
}
2018-01-11 18:17:23 +00:00
return codec . Err
2015-06-03 10:26:50 +00:00
}
2018-01-22 01:09:20 +00:00
// StreamingRpcHandler is used to make a streaming RPC call.
func ( s * Server ) StreamingRpcHandler ( method string ) ( structs . StreamingRpcHandler , error ) {
return s . streamingRpcs . GetHandler ( method )
}
2015-06-03 10:26:50 +00:00
// Stats is used to return statistics for debugging and insight
// for various sub-systems
func ( s * Server ) Stats ( ) map [ string ] map [ string ] string {
toString := func ( v uint64 ) string {
return strconv . FormatUint ( v , 10 )
}
stats := map [ string ] map [ string ] string {
2017-09-26 22:26:33 +00:00
"nomad" : {
2015-06-03 10:26:50 +00:00
"server" : "true" ,
"leader" : fmt . Sprintf ( "%v" , s . IsLeader ( ) ) ,
2017-02-03 00:07:15 +00:00
"leader_addr" : string ( s . raft . Leader ( ) ) ,
2020-03-02 15:29:24 +00:00
"bootstrap" : fmt . Sprintf ( "%v" , s . isSingleServerCluster ( ) ) ,
2015-08-17 00:40:35 +00:00
"known_regions" : toString ( uint64 ( len ( s . peers ) ) ) ,
2015-06-03 10:26:50 +00:00
} ,
"raft" : s . raft . Stats ( ) ,
2015-06-03 11:08:04 +00:00
"serf" : s . serf . Stats ( ) ,
2018-01-12 21:58:44 +00:00
"runtime" : stats . RuntimeStats ( ) ,
2018-11-02 20:28:51 +00:00
"vault" : s . vault . Stats ( ) ,
2015-06-03 10:26:50 +00:00
}
2017-02-02 21:52:31 +00:00
2015-06-03 10:26:50 +00:00
return stats
}
2016-10-17 17:48:04 +00:00
2019-06-17 20:51:31 +00:00
// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index
2019-06-14 21:30:27 +00:00
func ( s * Server ) EmitRaftStats ( period time . Duration , stopCh <- chan struct { } ) {
for {
select {
case <- time . After ( period ) :
2019-06-17 20:51:31 +00:00
lastIndex := s . raft . LastIndex ( )
metrics . SetGauge ( [ ] string { "raft" , "lastIndex" } , float32 ( lastIndex ) )
2019-06-14 21:30:27 +00:00
appliedIndex := s . raft . AppliedIndex ( )
metrics . SetGauge ( [ ] string { "raft" , "appliedIndex" } , float32 ( appliedIndex ) )
stateStoreSnapshotIndex , err := s . State ( ) . LatestIndex ( )
if err != nil {
s . logger . Warn ( "Unable to read snapshot index from statestore, metric will not be emitted" , "error" , err )
} else {
metrics . SetGauge ( [ ] string { "state" , "snapshotIndex" } , float32 ( stateStoreSnapshotIndex ) )
}
case <- stopCh :
return
}
}
}
2017-08-07 21:13:05 +00:00
// Region returns the region of the server
2016-10-17 17:48:04 +00:00
func ( s * Server ) Region ( ) string {
return s . config . Region
}
// Datacenter returns the data center of the server
func ( s * Server ) Datacenter ( ) string {
return s . config . Datacenter
}
// GetConfig returns the config of the server for testing purposes only
func ( s * Server ) GetConfig ( ) * Config {
return s . config
}
2017-02-02 23:31:36 +00:00
2017-08-21 03:51:30 +00:00
// ReplicationToken returns the token used for replication. We use a method to support
// dynamic reloading of this value later.
func ( s * Server ) ReplicationToken ( ) string {
return s . config . ReplicationToken
}
2019-11-14 13:18:29 +00:00
// ClusterID returns the unique ID for this cluster.
//
// Any Nomad server agent may call this method to get at the ID.
// If we are the leader and the ID has not yet been created, it will
// be created now. Otherwise an error is returned.
//
// The ID will not be created until all participating servers have reached
2020-01-31 02:21:01 +00:00
// a minimum version (0.10.4).
2019-11-14 13:18:29 +00:00
func ( s * Server ) ClusterID ( ) ( string , error ) {
s . clusterIDLock . Lock ( )
defer s . clusterIDLock . Unlock ( )
// try to load the cluster ID from state store
fsmState := s . fsm . State ( )
2020-06-26 17:23:32 +00:00
existingMeta , err := fsmState . ClusterMetadata ( nil )
2019-11-14 13:18:29 +00:00
if err != nil {
s . logger . Named ( "core" ) . Error ( "failed to get cluster ID" , "error" , err )
return "" , err
}
// got the cluster ID from state store, cache that and return it
if existingMeta != nil && existingMeta . ClusterID != "" {
return existingMeta . ClusterID , nil
}
// if we are not the leader, nothing more we can do
if ! s . IsLeader ( ) {
return "" , errors . New ( "cluster ID not ready yet" )
}
// we are the leader, try to generate the ID now
generatedID , err := s . generateClusterID ( )
if err != nil {
return "" , err
}
return generatedID , nil
}
2020-03-02 15:29:24 +00:00
func ( s * Server ) isSingleServerCluster ( ) bool {
return s . config . BootstrapExpect == 1
}
2017-02-02 23:31:36 +00:00
// peersInfoContent is used to help operators understand what happened to the
// peers.json file. This is written to a file called peers.info in the same
// location.
const peersInfoContent = `
2017-02-08 22:50:19 +00:00
As of Nomad 0.5 .5 , the peers . json file is only used for recovery
2019-05-03 20:27:30 +00:00
after an outage . The format of this file depends on what the server has
configured for its Raft protocol version . Please see the agent configuration
page at https : //www.consul.io/docs/agent/options.html#_raft_protocol for more
details about this parameter .
For Raft protocol version 2 and earlier , this should be formatted as a JSON
array containing the address and port of each Consul server in the cluster , like
this :
[
"10.1.0.1:8300" ,
"10.1.0.2:8300" ,
"10.1.0.3:8300"
]
For Raft protocol version 3 and later , this should be formatted as a JSON
array containing the node ID , address : port , and suffrage information of each
Consul server in the cluster , like this :
[
{
"id" : "adf4238a-882b-9ddc-4a9d-5b6758e4159e" ,
"address" : "10.1.0.1:8300" ,
"non_voter" : false
} ,
{
"id" : "8b6dda82-3103-11e7-93ae-92361f002671" ,
"address" : "10.1.0.2:8300" ,
"non_voter" : false
} ,
{
"id" : "97e17742-3103-11e7-93ae-92361f002671" ,
"address" : "10.1.0.3:8300" ,
"non_voter" : false
}
]
The "id" field is the node ID of the server . This can be found in the logs when
the server starts up , or in the "node-id" file inside the server ' s data
directory .
The "address" field is the address and port of the server .
The "non_voter" field controls whether the server is a non - voter , which is used
in some advanced Autopilot configurations , please see
https : //www.nomadproject.io/guides/operations/outage.html for more information. If
"non_voter" is omitted it will default to false , which is typical for most
clusters .
2017-02-02 23:31:36 +00:00
Under normal operation , the peers . json file will not be present .
2017-02-08 22:50:19 +00:00
When Nomad starts for the first time , it will create this peers . info file and
2017-02-02 23:31:36 +00:00
delete any existing peers . json file so that recovery doesn ' t occur on the first
startup .
Once this peers . info file is present , any peers . json file will be ingested at
startup , and will set the Raft peer configuration manually to recover from an
outage . It ' s crucial that all servers in the cluster are shut down before
creating the peers . json file , and that all servers receive the same
configuration . Once the peers . json file is successfully ingested and applied , it
will be deleted .
2017-02-10 23:37:13 +00:00
Please see https : //www.nomadproject.io/guides/outage.html for more information.
2017-02-02 23:31:36 +00:00
`