2023-04-10 15:36:59 +00:00
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
2015-06-01 15:49:10 +00:00
package nomad
import (
2017-11-20 15:38:46 +00:00
"context"
2015-06-03 10:26:50 +00:00
"crypto/tls"
2020-01-28 17:58:38 +00:00
"crypto/x509"
2022-01-15 01:09:14 +00:00
"encoding/json"
2020-01-28 17:58:38 +00:00
"errors"
2015-06-01 15:49:10 +00:00
"fmt"
2015-06-03 10:26:50 +00:00
"net"
"net/rpc"
2017-02-02 23:31:36 +00:00
"os"
2015-06-01 15:49:10 +00:00
"path/filepath"
2015-11-24 21:15:01 +00:00
"sort"
2015-06-03 10:26:50 +00:00
"strconv"
2020-01-28 17:58:38 +00:00
"strings"
2015-06-01 15:49:10 +00:00
"sync"
2016-06-16 19:00:15 +00:00
"sync/atomic"
2015-06-01 15:49:10 +00:00
"time"
2019-06-14 21:30:27 +00:00
"github.com/armon/go-metrics"
2016-06-14 05:58:39 +00:00
consulapi "github.com/hashicorp/consul/api"
2018-09-15 23:23:13 +00:00
log "github.com/hashicorp/go-hclog"
2017-11-15 01:53:23 +00:00
multierror "github.com/hashicorp/go-multierror"
2023-02-08 21:20:33 +00:00
"github.com/hashicorp/nomad/acl"
2022-09-01 18:27:10 +00:00
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
raftboltdb "github.com/hashicorp/raft-boltdb/v2"
"github.com/hashicorp/serf/serf"
"go.etcd.io/bbolt"
2016-06-14 05:58:39 +00:00
"github.com/hashicorp/nomad/command/agent/consul"
2022-02-02 16:59:53 +00:00
"github.com/hashicorp/nomad/helper"
2018-01-11 18:17:23 +00:00
"github.com/hashicorp/nomad/helper/codec"
2018-01-12 21:58:44 +00:00
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/helper/stats"
2016-10-25 23:05:37 +00:00
"github.com/hashicorp/nomad/helper/tlsutil"
2023-01-13 13:14:29 +00:00
"github.com/hashicorp/nomad/lib/auth/oidc"
2017-06-28 22:35:52 +00:00
"github.com/hashicorp/nomad/nomad/deploymentwatcher"
2018-03-08 23:08:23 +00:00
"github.com/hashicorp/nomad/nomad/drainer"
2015-08-29 21:22:24 +00:00
"github.com/hashicorp/nomad/nomad/state"
2016-05-28 01:14:34 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2017-11-20 15:38:46 +00:00
"github.com/hashicorp/nomad/nomad/structs/config"
2020-04-30 13:13:00 +00:00
"github.com/hashicorp/nomad/nomad/volumewatcher"
2018-03-14 17:53:27 +00:00
"github.com/hashicorp/nomad/scheduler"
2015-06-01 15:49:10 +00:00
)
const (
2016-06-16 19:14:03 +00:00
// datacenterQueryLimit sets the max number of DCs that a Nomad
// Server will query to find bootstrap_expect servers.
datacenterQueryLimit = 25
// maxStaleLeadership is the maximum time we will permit this Nomad
// Server to go without seeing a valid Raft leader.
maxStaleLeadership = 15 * time . Second
// peersPollInterval is used as the polling interval between attempts
// to query Consul for Nomad Servers.
peersPollInterval = 45 * time . Second
// peersPollJitter is used to provide a slight amount of variance to
// the retry interval when querying Consul Servers
peersPollJitterFactor = 2
2016-06-14 05:58:39 +00:00
2015-06-01 15:49:10 +00:00
raftState = "raft/"
2015-06-03 10:58:00 +00:00
serfSnapshot = "serf/snapshot"
2015-06-01 15:49:10 +00:00
snapshotsRetained = 2
2015-06-07 18:50:29 +00:00
// serverRPCCache controls how long we keep an idle connection open to a server
serverRPCCache = 2 * time . Minute
2018-03-11 17:50:39 +00:00
// serverMaxStreams controls how many idle streams we keep open to a server
2015-06-07 18:50:29 +00:00
serverMaxStreams = 64
2015-06-01 15:49:10 +00:00
// raftLogCacheSize is the maximum number of logs to cache in-memory.
2016-05-15 16:41:34 +00:00
// This is used to reduce disk I/O for the recently committed entries.
2015-06-01 15:49:10 +00:00
raftLogCacheSize = 512
2015-06-03 11:25:50 +00:00
// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
// to replicate to gracefully leave the cluster.
raftRemoveGracePeriod = 5 * time . Second
2017-02-01 00:43:57 +00:00
// defaultConsulDiscoveryInterval is how often to poll Consul for new
// servers if there is no leader.
2017-04-14 00:03:07 +00:00
defaultConsulDiscoveryInterval time . Duration = 3 * time . Second
2017-02-01 00:43:57 +00:00
// defaultConsulDiscoveryIntervalRetry is how often to poll Consul for
// new servers if there is no leader and the last Consul query failed.
2017-04-14 00:03:07 +00:00
defaultConsulDiscoveryIntervalRetry time . Duration = 9 * time . Second
2017-08-19 23:49:53 +00:00
// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing and
// construction cost, so we keep the hot objects cached to reduce the ACL token resolution time.
aclCacheSize = 512
2015-06-01 15:49:10 +00:00
)
// Server is Nomad server which manages the job queues,
// schedulers, and notification bus for agents.
type Server struct {
config * Config
2017-12-07 17:07:00 +00:00
2019-10-15 19:14:25 +00:00
logger log . InterceptLogger
2015-06-01 15:49:10 +00:00
2015-06-07 18:50:29 +00:00
// Connection pool to other Nomad servers
2018-01-12 21:58:44 +00:00
connPool * pool . ConnPool
2015-06-07 18:50:29 +00:00
2015-06-03 11:25:50 +00:00
// The raft instance is used among Nomad nodes within the
// region to protect operations that require strong consistency
2015-06-01 15:49:10 +00:00
raft * raft . Raft
raftLayer * RaftLayer
raftStore * raftboltdb . BoltStore
2015-06-01 19:11:40 +00:00
raftInmem * raft . InmemStore
2015-06-01 15:49:10 +00:00
raftTransport * raft . NetworkTransport
2020-06-09 16:00:09 +00:00
// reassertLeaderCh is used to signal that the leader loop must
// re-establish leadership.
//
// This might be relevant in snapshot restores, where leader in-memory
// state changed significantly such that leader state (e.g. periodic
// jobs, eval brokers) need to be recomputed.
2020-06-04 22:11:17 +00:00
reassertLeaderCh chan chan error
2017-12-18 21:16:23 +00:00
// autopilot is the Autopilot instance for this server.
autopilot * autopilot . Autopilot
2015-06-01 15:49:10 +00:00
// fsm is the state machine used with Raft
fsm * nomadFSM
2015-06-03 10:26:50 +00:00
// rpcListener is used to listen for incoming connections
2017-12-07 17:07:00 +00:00
rpcListener net . Listener
listenerCh chan struct { }
2017-11-28 17:33:46 +00:00
2018-02-15 23:03:12 +00:00
// tlsWrap is used to wrap outbound connections using TLS. It should be
// accessed using the lock.
tlsWrap tlsutil . RegionWrapper
tlsWrapLock sync . RWMutex
2018-09-15 23:23:13 +00:00
// TODO(alex,hclog): Can I move more into the handler?
// rpcHandler is used to serve and handle RPCs
* rpcHandler
2018-01-03 22:59:52 +00:00
// rpcServer is the static RPC server that is used by the local agent.
rpcServer * rpc . Server
2018-03-16 22:24:49 +00:00
// clientRpcAdvertise is the advertised RPC address for Nomad clients to connect
// to this server
clientRpcAdvertise net . Addr
// serverRpcAdvertise is the advertised RPC address for Nomad servers to connect
// to this server
serverRpcAdvertise net . Addr
2015-06-03 10:26:50 +00:00
// rpcTLS is the TLS config for incoming TLS requests
2017-12-07 17:07:00 +00:00
rpcTLS * tls . Config
rpcCancel context . CancelFunc
2015-06-03 10:26:50 +00:00
2018-02-05 21:32:39 +00:00
// streamingRpcs is the registry holding our streaming RPC handlers.
2018-03-11 18:41:13 +00:00
streamingRpcs * structs . StreamingRpcRegistry
2018-01-19 00:51:49 +00:00
2018-01-05 21:50:04 +00:00
// nodeConns is the set of multiplexed node connections we have keyed by
// NodeID
2018-04-26 20:22:09 +00:00
nodeConns map [ string ] [ ] * nodeConnState
2018-01-05 21:50:04 +00:00
nodeConnsLock sync . RWMutex
2015-06-04 10:33:12 +00:00
// peers is used to track the known Nomad servers. This is
// used for region forwarding and clustering.
2015-06-07 18:32:01 +00:00
peers map [ string ] [ ] * serverParts
2017-02-02 23:49:06 +00:00
localPeers map [ raft . ServerAddress ] * serverParts
2015-06-07 18:32:01 +00:00
peerLock sync . RWMutex
2015-06-04 10:33:12 +00:00
2015-06-03 10:58:00 +00:00
// serf is the Serf cluster containing only Nomad
// servers. This is used for multi-region federation
// and automatic clustering within regions.
serf * serf . Serf
2022-08-16 18:22:33 +00:00
// bootstrapped indicates if Server has bootstrapped or not.
bootstrapped * atomic . Bool
2015-06-04 10:42:56 +00:00
// reconcileCh is used to pass events from the serf handler
// into the leader manager. Mostly used to handle when servers
// join/leave from the region.
reconcileCh chan serf . Member
2019-07-02 07:58:02 +00:00
// used to track when the server is ready to serve consistent reads, updated atomically
2022-08-16 18:22:33 +00:00
readyForConsistentReads * atomic . Bool
2019-07-02 07:58:02 +00:00
2015-06-03 10:58:00 +00:00
// eventCh is used to receive events from the serf cluster
eventCh chan serf . Event
2017-06-28 22:35:52 +00:00
// BlockedEvals is used to manage evaluations that are blocked on node
// capacity changes.
blockedEvals * BlockedEvals
2022-07-06 14:13:48 +00:00
// evalBroker is used to manage the in-progress evaluations
// that are waiting to be brokered to a sub-scheduler
evalBroker * EvalBroker
// brokerLock is used to synchronise the alteration of the blockedEvals and
// evalBroker enabled state. These two subsystems change state when
// leadership changes or when the user modifies the setting via the
// operator scheduler configuration. This lock allows these actions to be
// performed safely, without potential for user interactions and leadership
// transitions to collide and create inconsistent state.
brokerLock sync . Mutex
2022-11-16 21:10:11 +00:00
// reapCancelableEvalsCh is used to signal the cancelable evals reaper to wake up
reapCancelableEvalsCh chan struct { }
2017-06-28 22:35:52 +00:00
// deploymentWatcher is used to watch deployments and their allocations and
2017-08-07 21:13:05 +00:00
// make the required calls to continue to transition the deployment.
2017-06-28 22:35:52 +00:00
deploymentWatcher * deploymentwatcher . Watcher
2018-02-27 00:28:10 +00:00
// nodeDrainer is used to drain allocations from nodes.
2018-03-08 23:08:23 +00:00
nodeDrainer * drainer . NodeDrainer
2018-02-27 00:28:10 +00:00
2020-04-30 13:13:00 +00:00
// volumeWatcher is used to release volume claims
volumeWatcher * volumewatcher . Watcher
2022-08-26 18:03:56 +00:00
// keyringReplicator is used to replicate root encryption keys from the
// leader
2022-06-07 12:40:12 +00:00
keyringReplicator * KeyringReplicator
2022-08-26 18:03:56 +00:00
// encrypter is the root keyring for encrypting variables and signing
// workload identities
2022-06-07 12:40:12 +00:00
encrypter * Encrypter
2017-06-28 22:35:52 +00:00
// periodicDispatcher is used to track and create evaluations for periodic jobs.
periodicDispatcher * PeriodicDispatch
2016-01-29 23:31:32 +00:00
2018-09-15 23:23:13 +00:00
// planner is used to mange the submitted allocation plans that are waiting
// to be accessed by the leader
* planner
2015-07-27 22:11:42 +00:00
2018-09-15 23:23:13 +00:00
// nodeHeartbeater is used to track expiration times of node heartbeats. If it
// detects an expired node, the node status is updated to be 'down'.
* nodeHeartbeater
2015-08-23 00:17:13 +00:00
2017-02-01 00:43:57 +00:00
// consulCatalog is used for discovering other Nomad Servers via Consul
consulCatalog consul . CatalogAPI
2016-06-14 05:58:39 +00:00
2020-07-28 20:12:08 +00:00
// consulConfigEntries is used for managing Consul Configuration Entries.
consulConfigEntries ConsulConfigsAPI
2019-12-06 20:46:46 +00:00
// consulACLs is used for managing Consul Service Identity tokens.
consulACLs ConsulACLsAPI
2016-08-10 03:15:13 +00:00
// vault is the client for communicating with Vault.
2016-08-10 20:20:13 +00:00
vault VaultClient
2016-08-10 03:15:13 +00:00
2015-08-23 17:53:53 +00:00
// Worker used for processing
2022-01-06 16:56:13 +00:00
workers [ ] * Worker
workerLock sync . RWMutex
workerConfigLock sync . RWMutex
2022-01-15 01:09:14 +00:00
workersEventCh chan interface { }
2015-08-23 17:53:53 +00:00
2017-08-19 23:49:53 +00:00
// aclCache is used to maintain the parsed ACL objects
2023-02-08 21:20:33 +00:00
aclCache * structs . ACLCache [ * acl . ACL ]
2017-08-19 23:49:53 +00:00
2023-01-13 13:14:29 +00:00
// oidcProviderCache maintains a cache of OIDC providers. This is useful as
// the provider performs background HTTP requests. When the Nomad server is
// shutting down, the oidcProviderCache.Shutdown() function must be called.
oidcProviderCache * oidc . ProviderCache
2017-10-23 19:50:37 +00:00
// leaderAcl is the management ACL token that is valid when resolved by the
// current leader.
leaderAcl string
leaderAclLock sync . Mutex
2019-11-14 13:18:29 +00:00
// clusterIDLock ensures the server does not try to concurrently establish
// a cluster ID, racing against itself in calls of ClusterID
clusterIDLock sync . Mutex
2017-12-18 21:16:23 +00:00
// statsFetcher is used by autopilot to check the status of the other
// Nomad router.
statsFetcher * StatsFetcher
2017-09-19 14:47:10 +00:00
// EnterpriseState is used to fill in state for Pro/Ent builds
EnterpriseState
2015-06-03 11:25:50 +00:00
left bool
2015-06-01 15:49:10 +00:00
shutdown bool
shutdownLock sync . Mutex
2019-03-12 21:25:14 +00:00
shutdownCtx context . Context
shutdownCancel context . CancelFunc
shutdownCh <- chan struct { }
2015-06-01 15:49:10 +00:00
}
2015-06-03 10:26:50 +00:00
// Holds the RPC endpoints
type endpoints struct {
2022-03-03 10:25:29 +00:00
Status * Status
Node * Node
Job * Job
CSIVolume * CSIVolume
CSIPlugin * CSIPlugin
Deployment * Deployment
Region * Region
Search * Search
Periodic * Periodic
System * System
Operator * Operator
ACL * ACL
Scaling * Scaling
Enterprise * EnterpriseEndpoints
Event * Event
Namespace * Namespace
2022-08-26 18:03:56 +00:00
Variables * Variables
2022-05-19 20:27:59 +00:00
Keyring * Keyring
2022-03-03 10:25:29 +00:00
ServiceRegistration * ServiceRegistration
2018-01-11 23:58:59 +00:00
// Client endpoints
2020-04-02 20:04:56 +00:00
ClientStats * ClientStats
FileSystem * FileSystem
Agent * Agent
ClientAllocations * ClientAllocations
ClientCSI * ClientCSI
2015-06-03 10:26:50 +00:00
}
2015-06-01 15:49:10 +00:00
// NewServer is used to construct a new Nomad server from the
// configuration, potentially returning an error
2020-07-28 20:12:08 +00:00
func NewServer ( config * Config , consulCatalog consul . CatalogAPI , consulConfigEntries consul . ConfigAPI , consulACLs consul . ACLsAPI ) ( * Server , error ) {
2015-06-01 15:49:10 +00:00
2015-07-24 04:44:17 +00:00
// Create an eval broker
2017-04-14 22:24:55 +00:00
evalBroker , err := NewEvalBroker (
config . EvalNackTimeout ,
config . EvalNackInitialReenqueueDelay ,
config . EvalNackSubsequentReenqueueDelay ,
config . EvalDeliveryLimit )
2015-07-24 04:44:17 +00:00
if err != nil {
return nil , err
}
2016-10-24 05:22:00 +00:00
// Configure TLS
2018-05-23 21:25:30 +00:00
tlsConf , err := tlsutil . NewTLSConfiguration ( config . TLSConfig , true , true )
if err != nil {
return nil , err
}
2020-01-28 17:58:38 +00:00
incomingTLS , tlsWrap , err := getTLSConf ( config . TLSConfig . EnableRPC , tlsConf , config . Region )
2017-11-29 17:54:05 +00:00
if err != nil {
return nil , err
2016-10-24 05:22:00 +00:00
}
2017-08-19 23:49:53 +00:00
// Create the ACL object cache
2023-02-08 21:20:33 +00:00
aclCache := structs . NewACLCache [ * acl . ACL ] ( aclCacheSize )
2017-08-19 23:49:53 +00:00
2018-09-17 21:22:40 +00:00
// Create the logger
2019-10-15 19:14:25 +00:00
logger := config . Logger . ResetNamedIntercept ( "nomad" )
2018-09-17 21:22:40 +00:00
2023-03-30 21:15:05 +00:00
// Validate enterprise license before anything stateful happens
if err = config . LicenseConfig . Validate ( ) ; err != nil {
return nil , err
}
2015-06-01 15:49:10 +00:00
// Create the server
s := & Server {
2022-08-16 18:22:33 +00:00
config : config ,
consulCatalog : consulCatalog ,
connPool : pool . NewPool ( logger , serverRPCCache , serverMaxStreams , tlsWrap ) ,
logger : logger ,
tlsWrap : tlsWrap ,
rpcServer : rpc . NewServer ( ) ,
streamingRpcs : structs . NewStreamingRpcRegistry ( ) ,
nodeConns : make ( map [ string ] [ ] * nodeConnState ) ,
peers : make ( map [ string ] [ ] * serverParts ) ,
localPeers : make ( map [ raft . ServerAddress ] * serverParts ) ,
bootstrapped : & atomic . Bool { } ,
reassertLeaderCh : make ( chan chan error ) ,
reconcileCh : make ( chan serf . Member , 32 ) ,
readyForConsistentReads : & atomic . Bool { } ,
eventCh : make ( chan serf . Event , 256 ) ,
evalBroker : evalBroker ,
2022-11-16 21:10:11 +00:00
reapCancelableEvalsCh : make ( chan struct { } ) ,
2022-08-16 18:22:33 +00:00
blockedEvals : NewBlockedEvals ( evalBroker , logger ) ,
rpcTLS : incomingTLS ,
aclCache : aclCache ,
workersEventCh : make ( chan interface { } , 1 ) ,
2015-06-01 15:49:10 +00:00
}
2019-03-12 21:25:14 +00:00
s . shutdownCtx , s . shutdownCancel = context . WithCancel ( context . Background ( ) )
s . shutdownCh = s . shutdownCtx . Done ( )
2018-09-15 23:23:13 +00:00
// Create the RPC handler
s . rpcHandler = newRpcHandler ( s )
// Create the planner
planner , err := newPlanner ( s )
if err != nil {
return nil , err
}
s . planner = planner
// Create the node heartbeater
s . nodeHeartbeater = newNodeHeartbeater ( s )
2015-12-18 20:26:28 +00:00
// Create the periodic dispatcher for launching periodic jobs.
2015-12-19 01:26:05 +00:00
s . periodicDispatcher = NewPeriodicDispatch ( s . logger , s )
2015-12-18 20:26:28 +00:00
2017-12-18 21:16:23 +00:00
// Initialize the stats fetcher that autopilot will use.
2018-09-15 23:23:13 +00:00
s . statsFetcher = NewStatsFetcher ( s . logger , s . connPool , s . config . Region )
2017-12-18 21:16:23 +00:00
2019-12-06 20:46:46 +00:00
// Setup Consul (more)
2020-07-28 20:12:08 +00:00
s . setupConsul ( consulConfigEntries , consulACLs )
2019-12-06 20:46:46 +00:00
2016-08-10 20:20:13 +00:00
// Setup Vault
if err := s . setupVaultClient ( ) ; err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to setup Vault client" , "error" , err )
2016-08-10 20:20:13 +00:00
return nil , fmt . Errorf ( "Failed to setup Vault client: %v" , err )
}
2022-06-07 12:40:12 +00:00
// Set up the keyring
2022-06-22 13:31:55 +00:00
keystorePath := filepath . Join ( s . config . DataDir , "keystore" )
if s . config . DevMode && s . config . DataDir == "" {
keystorePath , err = os . MkdirTemp ( "" , "nomad-keystore" )
if err != nil {
return nil , fmt . Errorf ( "Failed to create keystore tempdir" )
}
}
encrypter , err := NewEncrypter ( s , keystorePath )
2022-06-07 12:40:12 +00:00
if err != nil {
return nil , err
}
s . encrypter = encrypter
2023-01-13 13:14:29 +00:00
// Set up the OIDC provider cache. This is needed by the setupRPC, but must
// be done separately so that the server can stop all background processes
// when it shuts down itself.
s . oidcProviderCache = oidc . NewProviderCache ( )
2015-06-03 10:26:50 +00:00
// Initialize the RPC layer
2016-10-24 05:22:00 +00:00
if err := s . setupRPC ( tlsWrap ) ; err != nil {
2015-06-03 10:26:50 +00:00
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start RPC layer" , "error" , err )
2015-06-03 10:26:50 +00:00
return nil , fmt . Errorf ( "Failed to start RPC layer: %v" , err )
}
2015-06-01 15:49:10 +00:00
// Initialize the Raft server
if err := s . setupRaft ( ) ; err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start Raft" , "error" , err )
2015-06-01 15:49:10 +00:00
return nil , fmt . Errorf ( "Failed to start Raft: %v" , err )
}
2015-06-03 10:58:00 +00:00
// Initialize the wan Serf
s . serf , err = s . setupSerf ( config . SerfConfig , s . eventCh , serfSnapshot )
if err != nil {
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start serf WAN" , "error" , err )
2015-06-03 10:58:00 +00:00
return nil , fmt . Errorf ( "Failed to start serf: %v" , err )
}
2016-05-15 16:41:34 +00:00
// Initialize the scheduling workers
2022-01-06 16:56:13 +00:00
if err := s . setupWorkers ( s . shutdownCtx ) ; err != nil {
2015-07-28 22:12:08 +00:00
s . Shutdown ( )
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to start workers" , "error" , err )
2015-07-28 22:12:08 +00:00
return nil , fmt . Errorf ( "Failed to start workers: %v" , err )
}
2016-06-14 05:58:39 +00:00
// Setup the Consul syncer
if err := s . setupConsulSyncer ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to create server consul syncer" , "error" , err )
2018-09-15 23:42:38 +00:00
return nil , fmt . Errorf ( "failed to create server Consul syncer: %v" , err )
2016-06-14 05:58:39 +00:00
}
2017-06-28 22:35:52 +00:00
// Setup the deployment watcher.
if err := s . setupDeploymentWatcher ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to create deployment watcher" , "error" , err )
2017-06-28 22:35:52 +00:00
return nil , fmt . Errorf ( "failed to create deployment watcher: %v" , err )
}
2020-04-30 13:13:00 +00:00
// Setup the volume watcher
if err := s . setupVolumeWatcher ( ) ; err != nil {
s . logger . Error ( "failed to create volume watcher" , "error" , err )
return nil , fmt . Errorf ( "failed to create volume watcher: %v" , err )
}
2022-07-06 14:13:48 +00:00
// Start the eval broker notification system so any subscribers can get
// updates when the processes SetEnabled is triggered.
go s . evalBroker . enabledNotifier . Run ( s . shutdownCh )
2018-02-27 00:28:10 +00:00
// Setup the node drainer.
s . setupNodeDrainer ( )
2017-09-19 14:47:10 +00:00
// Setup the enterprise state
if err := s . setupEnterprise ( config ) ; err != nil {
return nil , err
}
2015-09-07 17:46:41 +00:00
// Monitor leadership changes
go s . monitorLeadership ( )
// Start ingesting events for Serf
go s . serfEventHandler ( )
2017-12-07 17:07:00 +00:00
// start the RPC listener for the server
2017-11-30 19:52:13 +00:00
s . startRPCListener ( )
2015-06-03 10:26:50 +00:00
2015-07-27 22:11:42 +00:00
// Emit metrics for the eval broker
2015-08-05 23:45:50 +00:00
go evalBroker . EmitStats ( time . Second , s . shutdownCh )
2015-07-24 05:17:37 +00:00
2015-07-27 22:11:42 +00:00
// Emit metrics for the plan queue
2018-09-15 23:23:13 +00:00
go s . planQueue . EmitStats ( time . Second , s . shutdownCh )
2015-07-27 22:11:42 +00:00
2022-07-12 22:40:20 +00:00
// Emit metrics for the planner's bad node tracker.
go s . planner . badNodeTracker . EmitStats ( time . Second , s . shutdownCh )
2016-02-01 02:46:45 +00:00
// Emit metrics for the blocked eval tracker.
2018-11-07 18:08:23 +00:00
go s . blockedEvals . EmitStats ( time . Second , s . shutdownCh )
2016-02-01 02:46:45 +00:00
2017-02-15 00:02:18 +00:00
// Emit metrics for the Vault client.
go s . vault . EmitStats ( time . Second , s . shutdownCh )
2015-08-23 00:17:13 +00:00
// Emit metrics
go s . heartbeatStats ( )
2019-06-14 21:30:27 +00:00
// Emit raft and state store metrics
2019-06-19 16:58:46 +00:00
go s . EmitRaftStats ( 10 * time . Second , s . shutdownCh )
2019-06-14 21:30:27 +00:00
2017-09-19 14:47:10 +00:00
// Start enterprise background workers
s . startEnterpriseBackground ( )
2022-06-07 12:40:12 +00:00
// Enable the keyring replicator on servers; the replicator has to
// be created before the RPC server and FSM but needs them to
// exist before it can start.
s . keyringReplicator = NewKeyringReplicator ( s , encrypter )
2015-06-01 15:49:10 +00:00
// Done
return s , nil
}
2018-03-30 22:49:56 +00:00
// startRPCListener starts the server's the RPC listener
2017-11-30 19:52:13 +00:00
func ( s * Server ) startRPCListener ( ) {
ctx , cancel := context . WithCancel ( context . Background ( ) )
s . rpcCancel = cancel
2018-03-30 22:49:56 +00:00
go s . listen ( ctx )
2017-11-30 19:52:13 +00:00
}
2017-12-07 17:07:00 +00:00
// createRPCListener creates the server's RPC listener
2018-01-16 16:55:11 +00:00
func ( s * Server ) createRPCListener ( ) ( * net . TCPListener , error ) {
2017-11-30 19:52:13 +00:00
s . listenerCh = make ( chan struct { } )
2018-01-16 16:55:11 +00:00
listener , err := net . ListenTCP ( "tcp" , s . config . RPCAddr )
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to initialize TLS listener" , "error" , err )
2018-01-16 16:55:11 +00:00
return listener , err
2017-11-30 19:52:13 +00:00
}
2018-01-16 16:55:11 +00:00
s . rpcListener = listener
return listener , nil
2017-11-30 19:52:13 +00:00
}
2017-12-07 17:07:00 +00:00
// getTLSConf gets the server's TLS configuration based on the config supplied
// by the operator
2020-01-28 17:58:38 +00:00
func getTLSConf ( enableRPC bool , tlsConf * tlsutil . Config , region string ) ( * tls . Config , tlsutil . RegionWrapper , error ) {
2017-11-20 15:38:46 +00:00
var tlsWrap tlsutil . RegionWrapper
var incomingTLS * tls . Config
2020-01-28 17:58:38 +00:00
if ! enableRPC {
return incomingTLS , tlsWrap , nil
}
2017-11-20 15:38:46 +00:00
2020-01-28 17:58:38 +00:00
tlsWrap , err := tlsConf . OutgoingTLSWrapper ( )
if err != nil {
return nil , nil , err
}
itls , err := tlsConf . IncomingTLSConfig ( )
if err != nil {
return nil , nil , err
}
if tlsConf . VerifyServerHostname {
incomingTLS = itls . Clone ( )
incomingTLS . VerifyPeerCertificate = rpcNameAndRegionValidator ( region )
} else {
2017-11-20 15:38:46 +00:00
incomingTLS = itls
}
2017-11-28 22:25:16 +00:00
return incomingTLS , tlsWrap , nil
}
2020-01-28 17:58:38 +00:00
// implements signature of tls.Config.VerifyPeerCertificate which is called
// after the certs have been verified. We'll ignore the raw certs and only
// check the verified certs.
func rpcNameAndRegionValidator ( region string ) func ( [ ] [ ] byte , [ ] [ ] * x509 . Certificate ) error {
return func ( _ [ ] [ ] byte , certificates [ ] [ ] * x509 . Certificate ) error {
if len ( certificates ) > 0 && len ( certificates [ 0 ] ) > 0 {
cert := certificates [ 0 ] [ 0 ]
for _ , dnsName := range cert . DNSNames {
if validateRPCRegionPeer ( dnsName , region ) {
return nil
}
}
if validateRPCRegionPeer ( cert . Subject . CommonName , region ) {
return nil
}
}
return errors . New ( "invalid role or region for certificate" )
}
}
func validateRPCRegionPeer ( name , region string ) bool {
parts := strings . Split ( name , "." )
if len ( parts ) < 3 {
// Invalid SAN
return false
}
if parts [ len ( parts ) - 1 ] != "nomad" {
// Incorrect service
return false
}
if parts [ 0 ] == "client" {
// Clients may only connect to servers in their region
return name == "client." + region + ".nomad"
}
// Servers may connect to any Nomad RPC service for federation.
return parts [ 0 ] == "server"
}
2017-12-07 17:07:00 +00:00
// reloadTLSConnections updates a server's TLS configuration and reloads RPC
// connections.
2017-12-05 00:29:43 +00:00
func ( s * Server ) reloadTLSConnections ( newTLSConfig * config . TLSConfig ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "reloading server connections due to configuration changes" )
2017-11-28 22:25:16 +00:00
2018-03-30 22:49:56 +00:00
// Check if we can reload the RPC listener
if s . rpcListener == nil || s . rpcCancel == nil {
2022-01-06 16:56:13 +00:00
s . logger . Warn ( "unable to reload configuration due to uninitialized rpc listener" )
2018-03-30 22:49:56 +00:00
return fmt . Errorf ( "can't reload uninitialized RPC listener" )
}
2018-05-23 21:25:30 +00:00
tlsConf , err := tlsutil . NewTLSConfiguration ( newTLSConfig , true , true )
2018-05-08 20:32:07 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to create TLS configuration" , "error" , err )
2018-05-08 20:32:07 +00:00
return err
}
2020-01-28 17:58:38 +00:00
incomingTLS , tlsWrap , err := getTLSConf ( newTLSConfig . EnableRPC , tlsConf , s . config . Region )
2017-11-29 17:54:05 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to reset TLS context" , "error" , err )
2017-11-29 17:54:05 +00:00
return err
}
2017-11-20 15:38:46 +00:00
2018-02-15 23:03:12 +00:00
// Store the new tls wrapper.
s . tlsWrapLock . Lock ( )
s . tlsWrap = tlsWrap
s . tlsWrapLock . Unlock ( )
2018-01-16 16:55:11 +00:00
// Keeping configuration in sync is important for other places that require
// access to config information, such as rpc.go, where we decide on what kind
// of network connections to accept depending on the server configuration
2017-11-29 22:22:21 +00:00
s . config . TLSConfig = newTLSConfig
2018-03-30 22:49:56 +00:00
// Kill any old listeners
s . rpcCancel ( )
2017-11-20 15:38:46 +00:00
s . rpcTLS = incomingTLS
s . connPool . ReloadTLS ( tlsWrap )
2018-03-30 18:05:00 +00:00
if err := s . rpcListener . Close ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "unable to close rpc listener" , "error" , err )
2018-03-30 18:05:00 +00:00
return err
}
2018-03-30 22:49:56 +00:00
// Wait for the old listener to exit
2017-11-30 19:52:13 +00:00
<- s . listenerCh
2017-11-28 17:33:46 +00:00
2018-03-30 22:49:56 +00:00
// Create the new listener with the update TLS config
2018-01-16 16:55:11 +00:00
listener , err := s . createRPCListener ( )
if err != nil {
listener . Close ( )
return err
}
2017-11-20 15:38:46 +00:00
2018-03-30 22:49:56 +00:00
// Start the new RPC listener
2018-03-30 21:15:56 +00:00
s . startRPCListener ( )
2018-01-16 19:16:35 +00:00
// Close and reload existing Raft connections
2017-11-27 16:42:52 +00:00
wrapper := tlsutil . RegionSpecificWrapper ( s . config . Region , tlsWrap )
2018-01-19 10:12:14 +00:00
s . raftLayer . ReloadTLS ( wrapper )
s . raftTransport . CloseStreams ( )
2017-11-27 16:42:52 +00:00
2018-09-15 23:23:13 +00:00
s . logger . Debug ( "finished reloading server connections" )
2017-11-20 15:38:46 +00:00
return nil
}
2015-06-01 15:49:10 +00:00
// Shutdown is used to shutdown the server
func ( s * Server ) Shutdown ( ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "shutting down server" )
2015-06-01 15:49:10 +00:00
s . shutdownLock . Lock ( )
defer s . shutdownLock . Unlock ( )
if s . shutdown {
return nil
}
s . shutdown = true
2019-03-12 21:25:14 +00:00
s . shutdownCancel ( )
2015-06-01 15:49:10 +00:00
2015-06-05 21:54:45 +00:00
if s . serf != nil {
s . serf . Shutdown ( )
}
2015-06-01 15:49:10 +00:00
if s . raft != nil {
s . raftTransport . Close ( )
s . raftLayer . Close ( )
future := s . raft . Shutdown ( )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "error shutting down raft" , "error" , err )
2015-06-01 15:49:10 +00:00
}
2015-06-03 09:26:49 +00:00
if s . raftStore != nil {
s . raftStore . Close ( )
}
2015-06-01 15:49:10 +00:00
}
2015-06-03 10:26:50 +00:00
// Shutdown the RPC listener
if s . rpcListener != nil {
s . rpcListener . Close ( )
}
2015-06-07 18:50:29 +00:00
// Close the connection pool
s . connPool . Shutdown ( )
2015-06-01 15:49:10 +00:00
// Close the fsm
if s . fsm != nil {
s . fsm . Close ( )
}
2016-08-10 20:20:13 +00:00
2020-01-02 15:03:05 +00:00
// Stop Vault token renewal and revocations
2016-08-11 20:04:56 +00:00
if s . vault != nil {
s . vault . Stop ( )
}
2016-08-10 20:20:13 +00:00
2020-01-02 15:03:05 +00:00
// Stop the Consul ACLs token revocations
s . consulACLs . Stop ( )
2020-07-28 20:12:08 +00:00
// Stop being able to set Configuration Entries
s . consulConfigEntries . Stop ( )
2023-01-13 13:14:29 +00:00
// Shutdown the OIDC provider cache which contains background resources and
// processes.
if s . oidcProviderCache != nil {
s . oidcProviderCache . Shutdown ( )
}
2015-06-01 15:49:10 +00:00
return nil
}
2015-07-28 22:12:08 +00:00
// IsShutdown checks if the server is shutdown
func ( s * Server ) IsShutdown ( ) bool {
select {
case <- s . shutdownCh :
return true
default :
return false
}
}
2015-06-03 11:25:50 +00:00
// Leave is used to prepare for a graceful shutdown of the server
func ( s * Server ) Leave ( ) error {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "server starting leave" )
2015-06-03 11:25:50 +00:00
s . left = true
// Check the number of known peers
2017-02-02 21:52:31 +00:00
numPeers , err := s . numPeers ( )
2015-06-03 11:25:50 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to check raft peers during leave" , "error" , err )
2015-06-03 11:25:50 +00:00
return err
}
2017-02-03 00:07:15 +00:00
addr := s . raftTransport . LocalAddr ( )
2015-06-03 11:25:50 +00:00
// If we are the current leader, and we have any other peers (cluster has multiple
// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
// not the leader, then we should issue our leave intention and wait to be removed
// for some sane period of time.
isLeader := s . IsLeader ( )
2017-02-02 21:52:31 +00:00
if isLeader && numPeers > 1 {
2022-09-01 18:27:10 +00:00
minRaftProtocol , err := s . MinRaftProtocol ( )
2018-01-16 21:35:32 +00:00
if err != nil {
return err
}
if minRaftProtocol >= 2 && s . config . RaftConfig . ProtocolVersion >= 3 {
future := s . raft . RemoveServer ( raft . ServerID ( s . config . NodeID ) , 0 , 0 )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to remove ourself as raft peer" , "error" , err )
2018-01-16 21:35:32 +00:00
}
} else {
future := s . raft . RemovePeer ( addr )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to remove ourself as raft peer" , "error" , err )
2018-01-16 21:35:32 +00:00
}
2015-06-03 11:25:50 +00:00
}
}
// Leave the gossip pool
if s . serf != nil {
if err := s . serf . Leave ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to leave Serf cluster" , "error" , err )
2015-06-03 11:25:50 +00:00
}
}
// If we were not leader, wait to be safely removed from the cluster.
// We must wait to allow the raft replication to take place, otherwise
// an immediate shutdown could cause a loss of quorum.
if ! isLeader {
2017-02-03 00:07:15 +00:00
left := false
2015-06-03 11:25:50 +00:00
limit := time . Now ( ) . Add ( raftRemoveGracePeriod )
2017-02-03 00:07:15 +00:00
for ! left && time . Now ( ) . Before ( limit ) {
// Sleep a while before we check.
time . Sleep ( 50 * time . Millisecond )
2015-06-03 11:25:50 +00:00
2017-02-03 00:07:15 +00:00
// Get the latest configuration.
future := s . raft . GetConfiguration ( )
if err := future . Error ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to get raft configuration" , "error" , err )
2015-06-03 11:25:50 +00:00
break
}
2017-02-03 00:07:15 +00:00
// See if we are no longer included.
left = true
for _ , server := range future . Configuration ( ) . Servers {
if server . Address == addr {
left = false
break
}
}
2015-06-03 11:25:50 +00:00
}
2017-02-03 00:07:15 +00:00
// TODO (alexdadgar) With the old Raft library we used to force the
// peers set to empty when a graceful leave occurred. This would
// keep voting spam down if the server was restarted, but it was
// dangerous because the peers was inconsistent with the logs and
// snapshots, so it wasn't really safe in all cases for the server
// to become leader. This is now safe, but the log spam is noisy.
// The next new version of the library will have a "you are not a
// peer stop it" behavior that should address this. We will have
// to evaluate during the RC period if this interim situation is
// not too confusing for operators.
// TODO (alexdadgar) When we take a later new version of the Raft
// library it won't try to complete replication, so this peer
// may not realize that it has been removed. Need to revisit this
// and the warning here.
if ! left {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "failed to leave raft configuration gracefully, timeout" )
2015-06-03 11:25:50 +00:00
}
}
return nil
}
2017-11-20 15:38:46 +00:00
// Reload handles a config reload specific to server-only configuration. Not
// all config fields can handle a reload.
func ( s * Server ) Reload ( newConfig * Config ) error {
if newConfig == nil {
2017-02-01 22:20:14 +00:00
return fmt . Errorf ( "Reload given a nil config" )
}
var mErr multierror . Error
// Handle the Vault reload. Vault should never be nil but just guard.
if s . vault != nil {
2017-11-20 15:38:46 +00:00
if err := s . vault . SetConfig ( newConfig . VaultConfig ) ; err != nil {
2021-01-14 20:46:35 +00:00
_ = multierror . Append ( & mErr , err )
2017-02-01 22:20:14 +00:00
}
}
2018-06-08 18:33:58 +00:00
shouldReloadTLS , err := tlsutil . ShouldReloadRPCConnections ( s . config . TLSConfig , newConfig . TLSConfig )
2018-03-28 21:54:22 +00:00
if err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error checking whether to reload TLS configuration" , "error" , err )
2018-03-28 21:54:22 +00:00
}
2018-06-08 18:33:58 +00:00
if shouldReloadTLS {
2017-12-05 00:29:43 +00:00
if err := s . reloadTLSConnections ( newConfig . TLSConfig ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error reloading server TLS configuration" , "error" , err )
2021-01-14 20:46:35 +00:00
_ = multierror . Append ( & mErr , err )
2017-02-01 22:20:14 +00:00
}
}
2023-03-30 21:15:05 +00:00
if newConfig . LicenseConfig . LicenseEnvBytes != "" || newConfig . LicenseConfig . LicensePath != "" {
if err = s . EnterpriseState . ReloadLicense ( newConfig ) ; err != nil {
s . logger . Error ( "error reloading license" , "error" , err )
_ = multierror . Append ( & mErr , err )
}
2021-03-23 13:08:14 +00:00
}
2022-01-06 16:56:13 +00:00
// Because this is a new configuration, we extract the worker pool arguments without acquiring a lock
workerPoolArgs := getSchedulerWorkerPoolArgsFromConfigLocked ( newConfig )
if reload , newVals := shouldReloadSchedulers ( s , workerPoolArgs ) ; reload {
if newVals . IsValid ( ) {
reloadSchedulers ( s , newVals )
}
reloadSchedulers ( s , newVals )
}
2023-01-20 19:21:51 +00:00
raftRC := raft . ReloadableConfig {
TrailingLogs : newConfig . RaftConfig . TrailingLogs ,
SnapshotInterval : newConfig . RaftConfig . SnapshotInterval ,
SnapshotThreshold : newConfig . RaftConfig . SnapshotThreshold ,
HeartbeatTimeout : newConfig . RaftConfig . HeartbeatTimeout ,
ElectionTimeout : newConfig . RaftConfig . ElectionTimeout ,
}
if err := s . raft . ReloadConfig ( raftRC ) ; err != nil {
multierror . Append ( & mErr , err )
}
2017-02-01 22:20:14 +00:00
return mErr . ErrorOrNil ( )
}
2016-06-16 19:14:03 +00:00
// setupBootstrapHandler() creates the closure necessary to support a Consul
// fallback handler.
func ( s * Server ) setupBootstrapHandler ( ) error {
// peersTimeout is used to indicate to the Consul Syncer that the
// current Nomad Server has a stale peer set. peersTimeout will time
// out if the Consul Syncer bootstrapFn has not observed a Raft
// leader in maxStaleLeadership. If peersTimeout has been triggered,
// the Consul Syncer will begin querying Consul for other Nomad
// Servers.
//
// NOTE: time.Timer is used vs time.Time in order to handle clock
// drift because time.Timer is implemented as a monotonic clock.
var peersTimeout * time . Timer = time . NewTimer ( 0 )
2016-06-16 21:27:10 +00:00
// consulQueryCount is the number of times the bootstrapFn has been
// called, regardless of success.
var consulQueryCount uint64
2016-06-16 19:14:03 +00:00
// leadershipTimedOut is a helper method that returns true if the
// peersTimeout timer has expired.
leadershipTimedOut := func ( ) bool {
select {
case <- peersTimeout . C :
return true
default :
return false
}
}
2016-06-14 05:58:39 +00:00
// The bootstrapFn callback handler is used to periodically poll
// Consul to look up the Nomad Servers in Consul. In the event the
// server has been brought up without a `retry-join` configuration
// and this Server is partitioned from the rest of the cluster,
// periodically poll Consul to reattach this Server to other servers
// in the same region and automatically reform a quorum (assuming the
// correct number of servers required for quorum are present).
bootstrapFn := func ( ) error {
2016-06-14 07:07:04 +00:00
// If there is a raft leader, do nothing
if s . raft . Leader ( ) != "" {
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( maxStaleLeadership )
2016-06-14 07:07:04 +00:00
return nil
}
2016-06-16 19:14:03 +00:00
// (ab)use serf.go's behavior of setting BootstrapExpect to
// zero if we have bootstrapped. If we have bootstrapped
2020-03-02 15:29:24 +00:00
bootstrapExpect := s . config . BootstrapExpect
2016-06-16 19:14:03 +00:00
if bootstrapExpect == 0 {
// This Nomad Server has been bootstrapped. Rely on
2016-06-16 21:27:10 +00:00
// the peersTimeout firing as a guard to prevent
// aggressive querying of Consul.
2016-06-16 19:14:03 +00:00
if ! leadershipTimedOut ( ) {
return nil
}
} else {
2016-06-16 21:27:10 +00:00
if consulQueryCount > 0 && ! leadershipTimedOut ( ) {
return nil
}
2016-06-16 19:14:03 +00:00
// This Nomad Server has not been bootstrapped, reach
// out to Consul if our peer list is less than
// `bootstrap_expect`.
2017-02-02 21:52:31 +00:00
raftPeers , err := s . numPeers ( )
2016-06-16 19:14:03 +00:00
if err != nil {
2022-04-09 11:22:44 +00:00
peersTimeout . Reset ( peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-16 19:14:03 +00:00
return nil
}
// The necessary number of Nomad Servers required for
// quorum has been reached, we do not need to poll
// Consul. Let the normal timeout-based strategy
// take over.
2020-03-02 15:29:24 +00:00
if raftPeers >= bootstrapExpect {
2022-04-09 11:22:44 +00:00
peersTimeout . Reset ( peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-16 19:14:03 +00:00
return nil
}
2016-06-14 05:58:39 +00:00
}
2016-06-16 21:27:10 +00:00
consulQueryCount ++
2016-06-14 05:58:39 +00:00
2018-09-15 23:23:13 +00:00
s . logger . Debug ( "lost contact with Nomad quorum, falling back to Consul for server list" )
2016-06-14 05:58:39 +00:00
2017-02-01 00:43:57 +00:00
dcs , err := s . consulCatalog . Datacenters ( )
2016-06-14 05:58:39 +00:00
if err != nil {
2022-04-09 11:22:44 +00:00
peersTimeout . Reset ( peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2017-02-03 00:07:15 +00:00
return fmt . Errorf ( "server.nomad: unable to query Consul datacenters: %v" , err )
2016-06-14 05:58:39 +00:00
}
if len ( dcs ) > 2 {
// Query the local DC first, then shuffle the
// remaining DCs. If additional calls to bootstrapFn
// are necessary, this Nomad Server will eventually
// walk all datacenter until it finds enough hosts to
// form a quorum.
2016-06-21 21:26:01 +00:00
shuffleStrings ( dcs [ 1 : ] )
2022-08-24 14:56:15 +00:00
dcs = dcs [ 0 : helper . Min ( len ( dcs ) , datacenterQueryLimit ) ]
2016-06-14 05:58:39 +00:00
}
nomadServerServiceName := s . config . ConsulConfig . ServerServiceName
var mErr multierror . Error
const defaultMaxNumNomadServers = 8
nomadServerServices := make ( [ ] string , 0 , defaultMaxNumNomadServers )
2016-06-16 21:27:10 +00:00
localNode := s . serf . Memberlist ( ) . LocalNode ( )
2016-06-14 05:58:39 +00:00
for _ , dc := range dcs {
2016-06-16 19:14:03 +00:00
consulOpts := & consulapi . QueryOptions {
2016-06-14 05:58:39 +00:00
AllowStale : true ,
Datacenter : dc ,
Near : "_agent" ,
WaitTime : consul . DefaultQueryWaitDuration ,
}
2017-02-01 00:43:57 +00:00
consulServices , _ , err := s . consulCatalog . Service ( nomadServerServiceName , consul . ServiceTagSerf , consulOpts )
2016-06-14 05:58:39 +00:00
if err != nil {
2016-06-16 21:40:09 +00:00
err := fmt . Errorf ( "failed to query service %q in Consul datacenter %q: %v" , nomadServerServiceName , dc , err )
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "failed to query Nomad service in Consul datacenter" , "service_name" , nomadServerServiceName , "dc" , dc , "error" , err )
2016-06-16 21:40:09 +00:00
mErr . Errors = append ( mErr . Errors , err )
2016-06-14 05:58:39 +00:00
continue
}
for _ , cs := range consulServices {
port := strconv . FormatInt ( int64 ( cs . ServicePort ) , 10 )
addr := cs . ServiceAddress
if addr == "" {
addr = cs . Address
}
2016-06-16 21:27:10 +00:00
if localNode . Addr . String ( ) == addr && int ( localNode . Port ) == cs . ServicePort {
continue
}
2016-06-14 05:58:39 +00:00
serverAddr := net . JoinHostPort ( addr , port )
nomadServerServices = append ( nomadServerServices , serverAddr )
}
}
2016-06-14 07:07:04 +00:00
2016-06-14 05:58:39 +00:00
if len ( nomadServerServices ) == 0 {
if len ( mErr . Errors ) > 0 {
2022-04-09 11:22:44 +00:00
peersTimeout . Reset ( peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-14 05:58:39 +00:00
return mErr . ErrorOrNil ( )
}
2016-06-14 07:07:04 +00:00
// Log the error and return nil so future handlers
// can attempt to register the `nomad` service.
2022-04-09 11:22:44 +00:00
pollInterval := peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor )
2018-09-15 23:23:13 +00:00
s . logger . Trace ( "no Nomad Servers advertising Nomad service in Consul datacenters" , "service_name" , nomadServerServiceName , "datacenters" , dcs , "retry" , pollInterval )
2016-06-16 21:27:10 +00:00
peersTimeout . Reset ( pollInterval )
2016-06-14 07:07:04 +00:00
return nil
2016-06-14 05:58:39 +00:00
}
2016-06-14 07:07:04 +00:00
2016-06-14 05:58:39 +00:00
numServersContacted , err := s . Join ( nomadServerServices )
if err != nil {
2022-04-09 11:22:44 +00:00
peersTimeout . Reset ( peersPollInterval + helper . RandomStagger ( peersPollInterval / peersPollJitterFactor ) )
2016-06-14 05:58:39 +00:00
return fmt . Errorf ( "contacted %d Nomad Servers: %v" , numServersContacted , err )
}
2016-06-16 19:14:03 +00:00
peersTimeout . Reset ( maxStaleLeadership )
2018-09-15 23:23:13 +00:00
s . logger . Info ( "successfully contacted Nomad servers" , "num_servers" , numServersContacted )
2016-06-14 05:58:39 +00:00
return nil
}
2016-06-16 19:14:03 +00:00
2017-02-01 00:43:57 +00:00
// Hacky replacement for old ConsulSyncer Periodic Handler.
go func ( ) {
lastOk := true
sync := time . NewTimer ( 0 )
for {
select {
case <- sync . C :
d := defaultConsulDiscoveryInterval
if err := bootstrapFn ( ) ; err != nil {
// Only log if it worked last time
if lastOk {
lastOk = false
2018-09-15 23:23:13 +00:00
s . logger . Error ( "error looking up Nomad servers in Consul" , "error" , err )
2017-02-01 00:43:57 +00:00
}
d = defaultConsulDiscoveryIntervalRetry
}
sync . Reset ( d )
case <- s . shutdownCh :
return
}
}
} ( )
2016-06-16 19:14:03 +00:00
return nil
}
// setupConsulSyncer creates Server-mode consul.Syncer which periodically
// executes callbacks on a fixed interval.
func ( s * Server ) setupConsulSyncer ( ) error {
2017-01-18 23:55:14 +00:00
if s . config . ConsulConfig . ServerAutoJoin != nil && * s . config . ConsulConfig . ServerAutoJoin {
2016-06-16 19:14:03 +00:00
if err := s . setupBootstrapHandler ( ) ; err != nil {
2016-06-16 21:40:09 +00:00
return err
2016-06-16 19:14:03 +00:00
}
2016-06-14 22:27:15 +00:00
}
2016-06-14 05:58:39 +00:00
2016-06-16 21:40:09 +00:00
return nil
2016-08-10 20:20:13 +00:00
}
2017-06-28 22:35:52 +00:00
// setupDeploymentWatcher creates a deployment watcher that consumes the RPC
2018-03-11 19:06:05 +00:00
// endpoints for state information and makes transitions via Raft through a
2017-06-28 22:35:52 +00:00
// shim that provides the appropriate methods.
func ( s * Server ) setupDeploymentWatcher ( ) error {
2017-08-31 00:45:32 +00:00
// Create the raft shim type to restrict the set of raft methods that can be
// made
2017-06-28 22:35:52 +00:00
raftShim := & deploymentWatcherRaftShim {
apply : s . raftApply ,
}
2017-07-03 18:26:45 +00:00
// Create the deployment watcher
s . deploymentWatcher = deploymentwatcher . NewDeploymentsWatcher (
2020-06-16 23:23:14 +00:00
s . logger ,
raftShim ,
2022-12-02 15:12:05 +00:00
NewDeploymentEndpoint ( s , nil ) ,
NewJobEndpoints ( s , nil ) ,
2021-06-04 19:38:46 +00:00
s . config . DeploymentQueryRateLimit ,
2020-06-16 23:23:14 +00:00
deploymentwatcher . CrossDeploymentUpdateBatchDuration ,
)
2017-07-03 18:26:45 +00:00
2017-06-28 22:35:52 +00:00
return nil
}
2020-08-06 18:31:18 +00:00
// setupVolumeWatcher creates a volume watcher that sends CSI RPCs
2020-04-30 13:13:00 +00:00
func ( s * Server ) setupVolumeWatcher ( ) error {
s . volumeWatcher = volumewatcher . NewVolumesWatcher (
2022-12-02 15:12:05 +00:00
s . logger , NewCSIVolumeEndpoint ( s , nil ) , s . getLeaderAcl ( ) )
2020-04-30 13:13:00 +00:00
return nil
}
2018-02-27 00:28:10 +00:00
// setupNodeDrainer creates a node drainer which will be enabled when a server
// becomes a leader.
func ( s * Server ) setupNodeDrainer ( ) {
2018-03-06 22:37:37 +00:00
// Create a shim around Raft requests
2018-02-27 00:28:10 +00:00
shim := drainerShim { s }
2018-03-08 23:08:23 +00:00
c := & drainer . NodeDrainerConfig {
2018-03-06 22:37:37 +00:00
Logger : s . logger ,
Raft : shim ,
2018-03-08 23:08:23 +00:00
JobFactory : drainer . GetDrainingJobWatcher ,
NodeFactory : drainer . GetNodeWatcherFactory ( ) ,
DrainDeadlineFactory : drainer . GetDeadlineNotifier ,
StateQueriesPerSecond : drainer . LimitStateQueriesPerSecond ,
BatchUpdateInterval : drainer . BatchUpdateInterval ,
2018-03-06 22:37:37 +00:00
}
2018-03-08 23:08:23 +00:00
s . nodeDrainer = drainer . NewNodeDrainer ( c )
2018-02-27 00:28:10 +00:00
}
2019-12-06 20:46:46 +00:00
// setupConsul is used to setup Server specific consul components.
2020-07-28 20:12:08 +00:00
func ( s * Server ) setupConsul ( consulConfigEntries consul . ConfigAPI , consulACLs consul . ACLsAPI ) {
s . consulConfigEntries = NewConsulConfigsAPI ( consulConfigEntries , s . logger )
2020-01-02 15:03:05 +00:00
s . consulACLs = NewConsulACLsAPI ( consulACLs , s . logger , s . purgeSITokenAccessors )
2019-12-06 20:46:46 +00:00
}
2016-08-10 20:20:13 +00:00
// setupVaultClient is used to set up the Vault API client.
func ( s * Server ) setupVaultClient ( ) error {
2020-07-17 14:41:45 +00:00
delegate := s . entVaultDelegate ( )
v , err := NewVaultClient ( s . config . VaultConfig , s . logger , s . purgeVaultAccessors , delegate )
2016-08-10 20:20:13 +00:00
if err != nil {
return err
}
s . vault = v
return nil
2016-06-14 05:58:39 +00:00
}
2015-06-03 10:26:50 +00:00
// setupRPC is used to setup the RPC listener
2016-11-01 18:55:29 +00:00
func ( s * Server ) setupRPC ( tlsWrap tlsutil . RegionWrapper ) error {
2018-01-03 22:59:52 +00:00
// Populate the static RPC server
2022-12-07 16:01:45 +00:00
s . setupRpcServer ( s . rpcServer , nil )
// Setup streaming endpoints
s . setupStreamingEndpoints ( s . rpcServer )
2015-06-03 10:26:50 +00:00
2018-01-16 16:55:11 +00:00
listener , err := s . createRPCListener ( )
2015-06-03 10:26:50 +00:00
if err != nil {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2015-06-03 10:26:50 +00:00
return err
}
2018-03-16 22:24:49 +00:00
if s . config . ClientRPCAdvertise != nil {
s . clientRpcAdvertise = s . config . ClientRPCAdvertise
2015-06-03 10:26:50 +00:00
} else {
2018-03-16 22:24:49 +00:00
s . clientRpcAdvertise = s . rpcListener . Addr ( )
2015-06-03 10:26:50 +00:00
}
// Verify that we have a usable advertise address
2018-03-16 22:53:14 +00:00
clientAddr , ok := s . clientRpcAdvertise . ( * net . TCPAddr )
2015-06-03 10:26:50 +00:00
if ! ok {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2018-03-16 22:53:14 +00:00
return fmt . Errorf ( "Client RPC advertise address is not a TCP Address: %v" , clientAddr )
2015-06-03 10:26:50 +00:00
}
2018-03-16 22:53:14 +00:00
if clientAddr . IP . IsUnspecified ( ) {
2018-01-16 16:55:11 +00:00
listener . Close ( )
2018-03-16 22:53:14 +00:00
return fmt . Errorf ( "Client RPC advertise address is not advertisable: %v" , clientAddr )
}
if s . config . ServerRPCAdvertise != nil {
s . serverRpcAdvertise = s . config . ServerRPCAdvertise
} else {
// Default to the Serf Advertise + RPC Port
serfIP := s . config . SerfConfig . MemberlistConfig . AdvertiseAddr
if serfIP == "" {
serfIP = s . config . SerfConfig . MemberlistConfig . BindAddr
}
addr := net . JoinHostPort ( serfIP , fmt . Sprintf ( "%d" , clientAddr . Port ) )
resolved , err := net . ResolveTCPAddr ( "tcp" , addr )
if err != nil {
return fmt . Errorf ( "Failed to resolve Server RPC advertise address: %v" , err )
}
s . serverRpcAdvertise = resolved
}
// Verify that we have a usable advertise address
serverAddr , ok := s . serverRpcAdvertise . ( * net . TCPAddr )
if ! ok {
return fmt . Errorf ( "Server RPC advertise address is not a TCP Address: %v" , serverAddr )
}
if serverAddr . IP . IsUnspecified ( ) {
listener . Close ( )
return fmt . Errorf ( "Server RPC advertise address is not advertisable: %v" , serverAddr )
2015-06-03 10:26:50 +00:00
}
2016-11-01 18:55:29 +00:00
wrapper := tlsutil . RegionSpecificWrapper ( s . config . Region , tlsWrap )
2018-03-16 22:24:49 +00:00
s . raftLayer = NewRaftLayer ( s . serverRpcAdvertise , wrapper )
2015-06-03 10:26:50 +00:00
return nil
}
2022-12-07 16:01:45 +00:00
// setupStreamingEndpoints is used to populate an RPC server with streaming
// endpoints. This only gets called at server startup.
func ( s * Server ) setupStreamingEndpoints ( server * rpc . Server ) {
2022-12-02 15:12:05 +00:00
// The endpoints are client RPCs and don't include a connection
// context. They also need to be registered as streaming endpoints in their
// register() methods.
clientAllocs := NewClientAllocationsEndpoint ( s )
clientAllocs . register ( )
fsEndpoint := NewFileSystemEndpoint ( s )
fsEndpoint . register ( )
2018-01-03 22:59:52 +00:00
2022-12-02 15:12:05 +00:00
agentEndpoint := NewAgentEndpoint ( s )
agentEndpoint . register ( )
2022-12-01 15:05:15 +00:00
2022-12-02 15:12:05 +00:00
// Event is a streaming-only endpoint so we don't want to register it as a
// normal RPC
eventEndpoint := NewEventEndpoint ( s )
eventEndpoint . register ( )
2022-12-01 15:05:15 +00:00
2022-12-02 15:12:05 +00:00
// Operator takes a RPC context but also has a streaming RPC that needs to
// be registered
2022-12-07 16:01:45 +00:00
operatorEndpoint := NewOperatorEndpoint ( s , nil )
2022-12-02 15:12:05 +00:00
operatorEndpoint . register ( )
2022-12-07 16:01:45 +00:00
}
2022-12-02 15:12:05 +00:00
2022-12-07 16:01:45 +00:00
// setupRpcServer is used to populate an RPC server with endpoints. This gets
// called at startup but also once for every new RPC connection so that RPC
// handlers can have per-connection context.
func ( s * Server ) setupRpcServer ( server * rpc . Server , ctx * RPCContext ) {
2022-12-02 15:12:05 +00:00
// These endpoints are client RPCs and don't include a connection context
_ = server . Register ( NewClientStatsEndpoint ( s ) )
2023-02-07 22:42:25 +00:00
_ = server . Register ( newNodeMetaEndpoint ( s ) )
2022-12-02 15:12:05 +00:00
2022-12-07 16:01:45 +00:00
// These endpoints have their streaming component registered in
// setupStreamingEndpoints, but their non-streaming RPCs are registered
// here.
_ = server . Register ( NewClientAllocationsEndpoint ( s ) )
_ = server . Register ( NewFileSystemEndpoint ( s ) )
_ = server . Register ( NewAgentEndpoint ( s ) )
_ = server . Register ( NewOperatorEndpoint ( s , ctx ) )
2022-12-02 15:12:05 +00:00
// All other endpoints include the connection context and don't need to be
// registered as streaming endpoints
2022-12-01 15:05:15 +00:00
_ = server . Register ( NewACLEndpoint ( s , ctx ) )
_ = server . Register ( NewAllocEndpoint ( s , ctx ) )
2023-01-26 21:40:58 +00:00
_ = server . Register ( NewClientCSIEndpoint ( s , ctx ) )
2022-12-01 15:05:15 +00:00
_ = server . Register ( NewCSIVolumeEndpoint ( s , ctx ) )
_ = server . Register ( NewCSIPluginEndpoint ( s , ctx ) )
_ = server . Register ( NewDeploymentEndpoint ( s , ctx ) )
_ = server . Register ( NewEvalEndpoint ( s , ctx ) )
_ = server . Register ( NewJobEndpoints ( s , ctx ) )
_ = server . Register ( NewKeyringEndpoint ( s , ctx , s . encrypter ) )
_ = server . Register ( NewNamespaceEndpoint ( s , ctx ) )
_ = server . Register ( NewNodeEndpoint ( s , ctx ) )
2023-06-01 19:55:49 +00:00
_ = server . Register ( NewNodePoolEndpoint ( s , ctx ) )
2022-12-01 15:05:15 +00:00
_ = server . Register ( NewPeriodicEndpoint ( s , ctx ) )
_ = server . Register ( NewPlanEndpoint ( s , ctx ) )
_ = server . Register ( NewRegionEndpoint ( s , ctx ) )
_ = server . Register ( NewScalingEndpoint ( s , ctx ) )
_ = server . Register ( NewSearchEndpoint ( s , ctx ) )
_ = server . Register ( NewServiceRegistrationEndpoint ( s , ctx ) )
_ = server . Register ( NewStatusEndpoint ( s , ctx ) )
_ = server . Register ( NewSystemEndpoint ( s , ctx ) )
_ = server . Register ( NewVariablesEndpoint ( s , ctx , s . encrypter ) )
2022-12-07 16:01:45 +00:00
// Register non-streaming
2022-12-01 15:44:33 +00:00
ent := NewEnterpriseEndpoints ( s , ctx )
ent . Register ( server )
2018-01-03 22:59:52 +00:00
}
2015-06-01 15:49:10 +00:00
// setupRaft is used to setup and initialize Raft
func ( s * Server ) setupRaft ( ) error {
2022-02-16 16:30:03 +00:00
2017-02-02 23:31:36 +00:00
// If we have an unclean exit then attempt to close the Raft store.
defer func ( ) {
if s . raft == nil && s . raftStore != nil {
if err := s . raftStore . Close ( ) ; err != nil {
2018-09-15 23:23:13 +00:00
s . logger . Error ( "failed to close Raft store" , "error" , err )
2017-02-02 23:31:36 +00:00
}
}
} ( )
2015-06-01 15:49:10 +00:00
// Create the FSM
2017-10-13 21:36:02 +00:00
fsmConfig := & FSMConfig {
2020-10-08 18:27:52 +00:00
EvalBroker : s . evalBroker ,
Periodic : s . periodicDispatcher ,
Blocked : s . blockedEvals ,
Logger : s . logger ,
Region : s . Region ( ) ,
EnableEventBroker : s . config . EnableEventBroker ,
EventBufferSize : s . config . EventBufferSize ,
2017-10-13 21:36:02 +00:00
}
2015-06-01 15:49:10 +00:00
var err error
2017-10-13 21:36:02 +00:00
s . fsm , err = NewFSM ( fsmConfig )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
2015-06-01 19:11:40 +00:00
// Create a transport layer
2015-08-26 00:36:52 +00:00
trans := raft . NewNetworkTransport ( s . raftLayer , 3 , s . config . RaftTimeout ,
2015-06-01 19:11:40 +00:00
s . config . LogOutput )
s . raftTransport = trans
2015-06-01 15:49:10 +00:00
2018-09-17 20:57:52 +00:00
// Make sure we set the Logger.
2020-02-11 19:41:22 +00:00
s . config . RaftConfig . Logger = s . logger . Named ( "raft" )
2018-09-17 20:57:52 +00:00
s . config . RaftConfig . LogOutput = nil
2017-02-02 23:31:36 +00:00
2019-09-18 21:45:51 +00:00
// Our version of Raft protocol 2 requires the LocalID to match the network
// address of the transport. Raft protocol 3 uses permanent ids.
2017-02-02 23:31:36 +00:00
s . config . RaftConfig . LocalID = raft . ServerID ( trans . LocalAddr ( ) )
2017-11-22 00:29:11 +00:00
if s . config . RaftConfig . ProtocolVersion >= 3 {
s . config . RaftConfig . LocalID = raft . ServerID ( s . config . NodeID )
}
2017-02-02 23:31:36 +00:00
// Build an all in-memory setup for dev mode, otherwise prepare a full
// disk-based setup.
2015-06-01 19:11:40 +00:00
var log raft . LogStore
var stable raft . StableStore
var snap raft . SnapshotStore
if s . config . DevMode {
store := raft . NewInmemStore ( )
s . raftInmem = store
stable = store
log = store
snap = raft . NewDiscardSnapshotStore ( )
} else {
// Create the base raft path
path := filepath . Join ( s . config . DataDir , raftState )
if err := ensurePath ( path , true ) ; err != nil {
return err
}
2015-06-01 15:49:10 +00:00
2022-03-24 18:42:00 +00:00
// Check Raft version and update the version file.
raftVersionFilePath := filepath . Join ( path , "version" )
raftVersionFileContent := strconv . Itoa ( int ( s . config . RaftConfig . ProtocolVersion ) )
if err := s . checkRaftVersionFile ( raftVersionFilePath ) ; err != nil {
return err
}
2023-03-08 15:39:03 +00:00
if err := os . WriteFile ( raftVersionFilePath , [ ] byte ( raftVersionFileContent ) , 0644 ) ; err != nil {
2022-03-24 18:42:00 +00:00
return fmt . Errorf ( "failed to write Raft version file: %v" , err )
}
2022-02-16 16:30:03 +00:00
// Create the BoltDB backend, with NoFreelistSync option
store , raftErr := raftboltdb . New ( raftboltdb . Options {
Path : filepath . Join ( path , "raft.db" ) ,
NoSync : false , // fsync each log write
BoltOptions : & bbolt . Options {
NoFreelistSync : s . config . RaftBoltNoFreelistSync ,
} ,
} )
if raftErr != nil {
return raftErr
2015-06-01 19:11:40 +00:00
}
s . raftStore = store
stable = store
2022-02-16 16:30:03 +00:00
s . logger . Info ( "setting up raft bolt store" , "no_freelist_sync" , s . config . RaftBoltNoFreelistSync )
// Start publishing bboltdb metrics
go store . RunMetrics ( s . shutdownCtx , 0 )
2015-06-01 15:49:10 +00:00
2015-06-01 19:11:40 +00:00
// Wrap the store in a LogCache to improve performance
cacheStore , err := raft . NewLogCache ( raftLogCacheSize , store )
if err != nil {
store . Close ( )
return err
}
log = cacheStore
2015-06-01 15:49:10 +00:00
2015-06-01 19:11:40 +00:00
// Create the snapshot store
snapshots , err := raft . NewFileSnapshotStore ( path , snapshotsRetained , s . config . LogOutput )
if err != nil {
if s . raftStore != nil {
s . raftStore . Close ( )
}
return err
}
snap = snapshots
2015-06-01 15:49:10 +00:00
2017-02-02 23:31:36 +00:00
// For an existing cluster being upgraded to the new version of
// Raft, we almost never want to run recovery based on the old
// peers.json file. We create a peers.info file with a helpful
// note about where peers.json went, and use that as a sentinel
// to avoid ingesting the old one that first time (if we have to
// create the peers.info file because it's not there, we also
// blow away any existing peers.json file).
peersFile := filepath . Join ( path , "peers.json" )
peersInfoFile := filepath . Join ( path , "peers.info" )
if _ , err := os . Stat ( peersInfoFile ) ; os . IsNotExist ( err ) {
2023-03-08 15:39:03 +00:00
if err := os . WriteFile ( peersInfoFile , [ ] byte ( peersInfoContent ) , 0644 ) ; err != nil {
2017-02-02 23:31:36 +00:00
return fmt . Errorf ( "failed to write peers.info file: %v" , err )
}
// Blow away the peers.json file if present, since the
// peers.info sentinel wasn't there.
if _ , err := os . Stat ( peersFile ) ; err == nil {
if err := os . Remove ( peersFile ) ; err != nil {
return fmt . Errorf ( "failed to delete peers.json, please delete manually (see peers.info for details): %v" , err )
}
2018-09-15 23:23:13 +00:00
s . logger . Info ( "deleted peers.json file (see peers.info for details)" )
2017-02-02 23:31:36 +00:00
}
} else if _ , err := os . Stat ( peersFile ) ; err == nil {
2018-09-15 23:23:13 +00:00
s . logger . Info ( "found peers.json file, recovering Raft configuration..." )
2019-04-29 23:38:27 +00:00
var configuration raft . Configuration
if s . config . RaftConfig . ProtocolVersion < 3 {
configuration , err = raft . ReadPeersJSON ( peersFile )
} else {
configuration , err = raft . ReadConfigJSON ( peersFile )
}
2017-02-02 23:31:36 +00:00
if err != nil {
return fmt . Errorf ( "recovery failed to parse peers.json: %v" , err )
}
2017-10-13 21:36:02 +00:00
tmpFsm , err := NewFSM ( fsmConfig )
2017-02-02 23:31:36 +00:00
if err != nil {
return fmt . Errorf ( "recovery failed to make temp FSM: %v" , err )
}
if err := raft . RecoverCluster ( s . config . RaftConfig , tmpFsm ,
log , stable , snap , trans , configuration ) ; err != nil {
return fmt . Errorf ( "recovery failed: %v" , err )
}
if err := os . Remove ( peersFile ) ; err != nil {
return fmt . Errorf ( "recovery failed to delete peers.json, please delete manually (see peers.info for details): %v" , err )
}
2018-09-15 23:23:13 +00:00
s . logger . Info ( "deleted peers.json file after successful recovery" )
2017-02-02 23:31:36 +00:00
}
2015-06-01 19:11:40 +00:00
}
2015-06-01 15:49:10 +00:00
2020-03-02 15:29:24 +00:00
// If we are a single server cluster and the state is clean then we can
2017-02-02 23:31:36 +00:00
// bootstrap now.
2020-03-02 15:29:24 +00:00
if s . isSingleServerCluster ( ) {
2017-02-02 23:31:36 +00:00
hasState , err := raft . HasExistingState ( log , stable , snap )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
2017-02-02 23:31:36 +00:00
if ! hasState {
configuration := raft . Configuration {
Servers : [ ] raft . Server {
2017-09-26 22:26:33 +00:00
{
2017-11-22 00:29:11 +00:00
ID : s . config . RaftConfig . LocalID ,
2017-02-02 23:31:36 +00:00
Address : trans . LocalAddr ( ) ,
} ,
} ,
}
if err := raft . BootstrapCluster ( s . config . RaftConfig ,
log , stable , snap , trans , configuration ) ; err != nil {
return err
}
2015-06-01 15:49:10 +00:00
}
}
// Setup the Raft store
2017-02-02 23:31:36 +00:00
s . raft , err = raft . NewRaft ( s . config . RaftConfig , s . fsm , log , stable , snap , trans )
2015-06-01 15:49:10 +00:00
if err != nil {
return err
}
return nil
}
2015-06-03 10:26:50 +00:00
2022-03-24 18:42:00 +00:00
// checkRaftVersionFile reads the Raft version file and returns an error if
// the Raft version is incompatible with the current version configured.
// Provide best-effort check if the file cannot be read.
func ( s * Server ) checkRaftVersionFile ( path string ) error {
raftVersion := s . config . RaftConfig . ProtocolVersion
baseWarning := "use the 'nomad operator raft list-peers' command to make sure the Raft protocol versions are consistent"
_ , err := os . Stat ( path )
if err != nil {
if os . IsNotExist ( err ) {
return nil
}
s . logger . Warn ( fmt . Sprintf ( "unable to read Raft version file, %s" , baseWarning ) , "error" , err )
return nil
}
2023-03-08 15:39:03 +00:00
v , err := os . ReadFile ( path )
2022-03-24 18:42:00 +00:00
if err != nil {
s . logger . Warn ( fmt . Sprintf ( "unable to read Raft version file, %s" , baseWarning ) , "error" , err )
return nil
}
previousVersion , err := strconv . Atoi ( strings . TrimSpace ( string ( v ) ) )
if err != nil {
s . logger . Warn ( fmt . Sprintf ( "invalid Raft protocol version in Raft version file, %s" , baseWarning ) , "error" , err )
return nil
}
if raft . ProtocolVersion ( previousVersion ) > raftVersion {
return fmt . Errorf ( "downgrading Raft is not supported, current version is %d, previous version was %d" , raftVersion , previousVersion )
}
return nil
}
2015-06-03 10:58:00 +00:00
// setupSerf is used to setup and initialize a Serf
func ( s * Server ) setupSerf ( conf * serf . Config , ch chan serf . Event , path string ) ( * serf . Serf , error ) {
conf . Init ( )
conf . NodeName = fmt . Sprintf ( "%s.%s" , s . config . NodeName , s . config . Region )
conf . Tags [ "role" ] = "nomad"
conf . Tags [ "region" ] = s . config . Region
conf . Tags [ "dc" ] = s . config . Datacenter
conf . Tags [ "build" ] = s . config . Build
2022-11-07 15:34:33 +00:00
conf . Tags [ "revision" ] = s . config . Revision
2022-03-24 18:44:21 +00:00
conf . Tags [ "vsn" ] = deprecatedAPIMajorVersionStr // for Nomad <= v1.2 compat
2017-11-22 00:29:11 +00:00
conf . Tags [ "raft_vsn" ] = fmt . Sprintf ( "%d" , s . config . RaftConfig . ProtocolVersion )
conf . Tags [ "id" ] = s . config . NodeID
2018-03-16 22:24:49 +00:00
conf . Tags [ "rpc_addr" ] = s . clientRpcAdvertise . ( * net . TCPAddr ) . IP . String ( ) // Address that clients will use to RPC to servers
conf . Tags [ "port" ] = fmt . Sprintf ( "%d" , s . serverRpcAdvertise . ( * net . TCPAddr ) . Port ) // Port servers use to RPC to one and another
2020-03-02 15:29:24 +00:00
if s . isSingleServerCluster ( ) {
2015-06-03 10:58:00 +00:00
conf . Tags [ "bootstrap" ] = "1"
}
2020-03-02 15:29:24 +00:00
bootstrapExpect := s . config . BootstrapExpect
2016-06-16 19:00:15 +00:00
if bootstrapExpect != 0 {
conf . Tags [ "expect" ] = fmt . Sprintf ( "%d" , bootstrapExpect )
2015-06-03 10:58:00 +00:00
}
2017-12-18 21:16:23 +00:00
if s . config . NonVoter {
conf . Tags [ "nonvoter" ] = "1"
}
2018-01-30 03:53:34 +00:00
if s . config . RedundancyZone != "" {
conf . Tags [ AutopilotRZTag ] = s . config . RedundancyZone
}
if s . config . UpgradeVersion != "" {
conf . Tags [ AutopilotVersionTag ] = s . config . UpgradeVersion
}
2019-10-24 20:55:23 +00:00
logger := s . logger . StandardLoggerIntercept ( & log . StandardLoggerOptions { InferLevels : true } )
2018-09-17 20:57:52 +00:00
conf . MemberlistConfig . Logger = logger
conf . Logger = logger
conf . MemberlistConfig . LogOutput = nil
conf . LogOutput = nil
2015-06-03 10:58:00 +00:00
conf . EventCh = ch
2015-06-04 10:33:12 +00:00
if ! s . config . DevMode {
conf . SnapshotPath = filepath . Join ( s . config . DataDir , path )
if err := ensurePath ( conf . SnapshotPath , false ) ; err != nil {
return nil , err
}
}
2018-05-10 16:16:24 +00:00
// LeavePropagateDelay is used to make sure broadcasted leave intents propagate
// This value was tuned using https://www.serf.io/docs/internals/simulator.html to
// allow for convergence in 99.9% of nodes in a 10 node cluster
conf . LeavePropagateDelay = 1 * time . Second
2015-06-03 10:58:00 +00:00
conf . Merge = & serfMergeDelegate { }
// Until Nomad supports this fully, we disable automatic resolution.
// When enabled, the Serf gossip may just turn off if we are the minority
// node which is rather unexpected.
conf . EnableNameConflictResolution = false
return serf . Create ( conf )
}
2022-01-06 16:56:13 +00:00
// shouldReloadSchedulers checks the new config to determine if the scheduler worker pool
// needs to be updated. If so, returns true and a pointer to a populated SchedulerWorkerPoolArgs
func shouldReloadSchedulers ( s * Server , newPoolArgs * SchedulerWorkerPoolArgs ) ( bool , * SchedulerWorkerPoolArgs ) {
s . workerConfigLock . RLock ( )
defer s . workerConfigLock . RUnlock ( )
newSchedulers := make ( [ ] string , len ( newPoolArgs . EnabledSchedulers ) )
copy ( newSchedulers , newPoolArgs . EnabledSchedulers )
sort . Strings ( newSchedulers )
if s . config . NumSchedulers != newPoolArgs . NumSchedulers {
return true , newPoolArgs
}
oldSchedulers := make ( [ ] string , len ( s . config . EnabledSchedulers ) )
copy ( oldSchedulers , s . config . EnabledSchedulers )
sort . Strings ( oldSchedulers )
for i , v := range newSchedulers {
if oldSchedulers [ i ] != v {
return true , newPoolArgs
}
}
return false , nil
}
// SchedulerWorkerPoolArgs are the two key configuration options for a Nomad server's
// scheduler worker pool. Before using, you should always verify that they are rational
// using IsValid() or IsInvalid()
type SchedulerWorkerPoolArgs struct {
NumSchedulers int
EnabledSchedulers [ ] string
}
// IsInvalid returns true when the SchedulerWorkerPoolArgs.IsValid is false
func ( swpa SchedulerWorkerPoolArgs ) IsInvalid ( ) bool {
return ! swpa . IsValid ( )
}
// IsValid verifies that the pool arguments are valid. That is, they have a non-negative
// numSchedulers value and the enabledSchedulers list has _core and only refers to known
// schedulers.
func ( swpa SchedulerWorkerPoolArgs ) IsValid ( ) bool {
if swpa . NumSchedulers < 0 {
// the pool has to be non-negative
return false
}
// validate the scheduler list against the builtin types and _core
foundCore := false
for _ , sched := range swpa . EnabledSchedulers {
if sched == structs . JobTypeCore {
foundCore = true
continue // core is not in the BuiltinSchedulers map, so we need to skip that check
}
if _ , ok := scheduler . BuiltinSchedulers [ sched ] ; ! ok {
return false // found an unknown scheduler in the list; bailing out
}
}
return foundCore
}
// Copy returns a clone of a SchedulerWorkerPoolArgs struct. Concurrent access
// concerns should be managed by the caller.
func ( swpa SchedulerWorkerPoolArgs ) Copy ( ) SchedulerWorkerPoolArgs {
out := SchedulerWorkerPoolArgs {
NumSchedulers : swpa . NumSchedulers ,
EnabledSchedulers : make ( [ ] string , len ( swpa . EnabledSchedulers ) ) ,
}
copy ( out . EnabledSchedulers , swpa . EnabledSchedulers )
return out
}
func getSchedulerWorkerPoolArgsFromConfigLocked ( c * Config ) * SchedulerWorkerPoolArgs {
return & SchedulerWorkerPoolArgs {
NumSchedulers : c . NumSchedulers ,
EnabledSchedulers : c . EnabledSchedulers ,
}
}
// GetSchedulerWorkerInfo returns a slice of WorkerInfos from all of
// the running scheduler workers.
func ( s * Server ) GetSchedulerWorkersInfo ( ) [ ] WorkerInfo {
s . workerLock . RLock ( )
defer s . workerLock . RUnlock ( )
out := make ( [ ] WorkerInfo , len ( s . workers ) )
for i := 0 ; i < len ( s . workers ) ; i = i + 1 {
workerInfo := s . workers [ i ] . Info ( )
out [ i ] = workerInfo . Copy ( )
}
return out
}
// GetSchedulerWorkerConfig returns a clean copy of the server's current scheduler
// worker config.
func ( s * Server ) GetSchedulerWorkerConfig ( ) SchedulerWorkerPoolArgs {
s . workerConfigLock . RLock ( )
defer s . workerConfigLock . RUnlock ( )
return getSchedulerWorkerPoolArgsFromConfigLocked ( s . config ) . Copy ( )
}
func ( s * Server ) SetSchedulerWorkerConfig ( newArgs SchedulerWorkerPoolArgs ) SchedulerWorkerPoolArgs {
if reload , newVals := shouldReloadSchedulers ( s , & newArgs ) ; reload {
if newVals . IsValid ( ) {
reloadSchedulers ( s , newVals )
}
}
return s . GetSchedulerWorkerConfig ( )
}
// reloadSchedulers validates the passed scheduler worker pool arguments, locks the
// workerLock, applies the new values to the s.config, and restarts the pool
func reloadSchedulers ( s * Server , newArgs * SchedulerWorkerPoolArgs ) {
if newArgs == nil || newArgs . IsInvalid ( ) {
s . logger . Info ( "received invalid arguments for scheduler pool reload; ignoring" )
return
}
// reload will modify the server.config so it needs a write lock
s . workerConfigLock . Lock ( )
defer s . workerConfigLock . Unlock ( )
// reload modifies the worker slice so it needs a write lock
s . workerLock . Lock ( )
defer s . workerLock . Unlock ( )
// TODO: If EnabledSchedulers didn't change, we can scale rather than drain and rebuild
s . config . NumSchedulers = newArgs . NumSchedulers
s . config . EnabledSchedulers = newArgs . EnabledSchedulers
s . setupNewWorkersLocked ( )
}
2015-07-28 22:12:08 +00:00
// setupWorkers is used to start the scheduling workers
2022-01-06 16:56:13 +00:00
func ( s * Server ) setupWorkers ( ctx context . Context ) error {
poolArgs := s . GetSchedulerWorkerConfig ( )
2022-01-15 01:09:14 +00:00
go s . listenWorkerEvents ( )
2022-01-06 16:56:13 +00:00
// we will be writing to the worker slice
s . workerLock . Lock ( )
defer s . workerLock . Unlock ( )
return s . setupWorkersLocked ( ctx , poolArgs )
}
// setupWorkersLocked directly manipulates the server.config, so it is not safe to
// call concurrently. Use setupWorkers() or call this with server.workerLock set.
func ( s * Server ) setupWorkersLocked ( ctx context . Context , poolArgs SchedulerWorkerPoolArgs ) error {
2015-07-28 22:12:08 +00:00
// Check if all the schedulers are disabled
2022-01-06 16:56:13 +00:00
if len ( poolArgs . EnabledSchedulers ) == 0 || poolArgs . NumSchedulers == 0 {
2018-09-15 23:23:13 +00:00
s . logger . Warn ( "no enabled schedulers" )
2015-07-28 22:12:08 +00:00
return nil
}
2018-03-14 17:37:49 +00:00
// Check if the core scheduler is not enabled
2018-03-14 17:53:27 +00:00
foundCore := false
2022-01-06 16:56:13 +00:00
for _ , sched := range poolArgs . EnabledSchedulers {
2018-03-14 17:53:27 +00:00
if sched == structs . JobTypeCore {
foundCore = true
continue
}
if _ , ok := scheduler . BuiltinSchedulers [ sched ] ; ! ok {
return fmt . Errorf ( "invalid configuration: unknown scheduler %q in enabled schedulers" , sched )
2018-03-14 17:37:49 +00:00
}
}
2018-03-14 17:53:27 +00:00
if ! foundCore {
return fmt . Errorf ( "invalid configuration: %q scheduler not enabled" , structs . JobTypeCore )
2018-03-14 17:37:49 +00:00
}
2022-01-06 16:56:13 +00:00
s . logger . Info ( "starting scheduling worker(s)" , "num_workers" , poolArgs . NumSchedulers , "schedulers" , poolArgs . EnabledSchedulers )
2015-07-28 22:12:08 +00:00
// Start the workers
2022-01-06 16:56:13 +00:00
2015-07-28 22:12:08 +00:00
for i := 0 ; i < s . config . NumSchedulers ; i ++ {
2022-01-06 16:56:13 +00:00
if w , err := NewWorker ( ctx , s , poolArgs ) ; err != nil {
2015-07-28 22:12:08 +00:00
return err
2015-08-23 17:53:53 +00:00
} else {
2022-01-06 16:56:13 +00:00
s . logger . Debug ( "started scheduling worker" , "id" , w . ID ( ) , "index" , i + 1 , "of" , s . config . NumSchedulers )
2015-08-23 17:53:53 +00:00
s . workers = append ( s . workers , w )
2015-07-28 22:12:08 +00:00
}
}
2022-01-06 16:56:13 +00:00
s . logger . Info ( "started scheduling worker(s)" , "num_workers" , s . config . NumSchedulers , "schedulers" , s . config . EnabledSchedulers )
2015-07-28 22:12:08 +00:00
return nil
}
2022-01-06 16:56:13 +00:00
// setupNewWorkersLocked directly manipulates the server.config, so it is not safe to
// call concurrently. Use reloadWorkers() or call this with server.workerLock set.
func ( s * Server ) setupNewWorkersLocked ( ) error {
// make a copy of the s.workers array so we can safely stop those goroutines asynchronously
oldWorkers := make ( [ ] * Worker , len ( s . workers ) )
defer s . stopOldWorkers ( oldWorkers )
2022-05-31 23:31:58 +00:00
copy ( oldWorkers , s . workers )
2022-01-06 16:56:13 +00:00
s . logger . Info ( fmt . Sprintf ( "marking %v current schedulers for shutdown" , len ( oldWorkers ) ) )
// build a clean backing array and call setupWorkersLocked like setupWorkers
// does in the normal startup path
s . workers = make ( [ ] * Worker , 0 , s . config . NumSchedulers )
poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked ( s . config ) . Copy ( )
err := s . setupWorkersLocked ( s . shutdownCtx , poolArgs )
if err != nil {
return err
}
// if we're the leader, we need to pause all of the pausable workers.
s . handlePausableWorkers ( s . IsLeader ( ) )
return nil
}
// stopOldWorkers is called once setupNewWorkers has created the new worker
// array to asynchronously stop each of the old workers individually.
func ( s * Server ) stopOldWorkers ( oldWorkers [ ] * Worker ) {
workerCount := len ( oldWorkers )
for i , w := range oldWorkers {
s . logger . Debug ( "stopping old scheduling worker" , "id" , w . ID ( ) , "index" , i + 1 , "of" , workerCount )
go w . Stop ( )
}
}
2022-01-15 01:09:14 +00:00
// listenWorkerEvents listens for events emitted by scheduler workers and log
// them if necessary. Some events may be skipped to avoid polluting logs with
// duplicates.
func ( s * Server ) listenWorkerEvents ( ) {
loggedAt := make ( map [ string ] time . Time )
gcDeadline := 4 * time . Hour
gcTicker := time . NewTicker ( 10 * time . Second )
defer gcTicker . Stop ( )
for {
select {
case <- gcTicker . C :
for k , v := range loggedAt {
if time . Since ( v ) >= gcDeadline {
delete ( loggedAt , k )
}
}
case e := <- s . workersEventCh :
switch event := e . ( type ) {
case * scheduler . PortCollisionEvent :
if event == nil || event . Node == nil {
continue
}
if _ , ok := loggedAt [ event . Node . ID ] ; ok {
continue
}
eventJson , err := json . Marshal ( event . Sanitize ( ) )
if err != nil {
s . logger . Debug ( "failed to encode event to JSON" , "error" , err )
}
s . logger . Warn ( "unexpected node port collision, refer to https://www.nomadproject.io/s/port-plan-failure for more information" ,
"node_id" , event . Node . ID , "reason" , event . Reason , "event" , string ( eventJson ) )
loggedAt [ event . Node . ID ] = time . Now ( )
}
case <- s . shutdownCh :
return
}
}
}
2017-02-02 21:52:31 +00:00
// numPeers is used to check on the number of known peers, including the local
// node.
func ( s * Server ) numPeers ( ) ( int , error ) {
future := s . raft . GetConfiguration ( )
if err := future . Error ( ) ; err != nil {
2015-06-03 11:25:50 +00:00
return 0 , err
}
2017-02-02 21:52:31 +00:00
configuration := future . Configuration ( )
return len ( configuration . Servers ) , nil
2015-06-03 11:25:50 +00:00
}
2015-06-03 10:26:50 +00:00
// IsLeader checks if this server is the cluster leader
func ( s * Server ) IsLeader ( ) bool {
return s . raft . State ( ) == raft . Leader
}
2015-06-03 11:25:50 +00:00
// Join is used to have Nomad join the gossip ring
// The target address should be another node listening on the
// Serf address
func ( s * Server ) Join ( addrs [ ] string ) ( int , error ) {
return s . serf . Join ( addrs , true )
}
// LocalMember is used to return the local node
2019-03-16 22:53:53 +00:00
func ( s * Server ) LocalMember ( ) serf . Member {
return s . serf . LocalMember ( )
2015-06-03 11:25:50 +00:00
}
// Members is used to return the members of the serf cluster
func ( s * Server ) Members ( ) [ ] serf . Member {
return s . serf . Members ( )
}
// RemoveFailedNode is used to remove a failed node from the cluster
func ( s * Server ) RemoveFailedNode ( node string ) error {
return s . serf . RemoveFailedNode ( node )
}
// KeyManager returns the Serf keyring manager
func ( s * Server ) KeyManager ( ) * serf . KeyManager {
return s . serf . KeyManager ( )
}
// Encrypted determines if gossip is encrypted
func ( s * Server ) Encrypted ( ) bool {
return s . serf . EncryptionEnabled ( )
}
2015-08-29 21:22:24 +00:00
// State returns the underlying state store. This should *not*
// be used to modify state directly.
func ( s * Server ) State ( ) * state . StateStore {
return s . fsm . State ( )
}
2017-10-23 19:50:37 +00:00
// setLeaderAcl stores the given ACL token as the current leader's ACL token.
func ( s * Server ) setLeaderAcl ( token string ) {
s . leaderAclLock . Lock ( )
s . leaderAcl = token
s . leaderAclLock . Unlock ( )
}
// getLeaderAcl retrieves the leader's ACL token
func ( s * Server ) getLeaderAcl ( ) string {
s . leaderAclLock . Lock ( )
defer s . leaderAclLock . Unlock ( )
return s . leaderAcl
}
2019-07-02 07:58:02 +00:00
// Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
func ( s * Server ) setConsistentReadReady ( ) {
2022-08-16 18:22:33 +00:00
s . readyForConsistentReads . Store ( true )
2019-07-02 07:58:02 +00:00
}
// Atomically reset readiness state flag on leadership revoke
func ( s * Server ) resetConsistentReadReady ( ) {
2022-08-16 18:22:33 +00:00
s . readyForConsistentReads . Store ( false )
2019-07-02 07:58:02 +00:00
}
// Returns true if this server is ready to serve consistent reads
func ( s * Server ) isReadyForConsistentReads ( ) bool {
2022-08-16 18:22:33 +00:00
return s . readyForConsistentReads . Load ( )
2019-07-02 07:58:02 +00:00
}
2015-11-24 05:47:11 +00:00
// Regions returns the known regions in the cluster.
func ( s * Server ) Regions ( ) [ ] string {
2015-11-24 05:49:03 +00:00
s . peerLock . RLock ( )
defer s . peerLock . RUnlock ( )
2015-11-24 05:47:11 +00:00
regions := make ( [ ] string , 0 , len ( s . peers ) )
2017-09-26 22:26:33 +00:00
for region := range s . peers {
2015-11-24 05:47:11 +00:00
regions = append ( regions , region )
}
2015-11-24 21:15:01 +00:00
sort . Strings ( regions )
2015-11-24 05:47:11 +00:00
return regions
}
2015-06-03 10:26:50 +00:00
// RPC is used to make a local RPC call
func ( s * Server ) RPC ( method string , args interface { } , reply interface { } ) error {
2018-01-11 18:17:23 +00:00
codec := & codec . InmemCodec {
Method : method ,
Args : args ,
Reply : reply ,
2015-06-03 10:26:50 +00:00
}
if err := s . rpcServer . ServeRequest ( codec ) ; err != nil {
return err
}
2018-01-11 18:17:23 +00:00
return codec . Err
2015-06-03 10:26:50 +00:00
}
2018-01-22 01:09:20 +00:00
// StreamingRpcHandler is used to make a streaming RPC call.
func ( s * Server ) StreamingRpcHandler ( method string ) ( structs . StreamingRpcHandler , error ) {
return s . streamingRpcs . GetHandler ( method )
}
2015-06-03 10:26:50 +00:00
// Stats is used to return statistics for debugging and insight
// for various sub-systems
func ( s * Server ) Stats ( ) map [ string ] map [ string ] string {
toString := func ( v uint64 ) string {
return strconv . FormatUint ( v , 10 )
}
stats := map [ string ] map [ string ] string {
2017-09-26 22:26:33 +00:00
"nomad" : {
2015-06-03 10:26:50 +00:00
"server" : "true" ,
"leader" : fmt . Sprintf ( "%v" , s . IsLeader ( ) ) ,
2017-02-03 00:07:15 +00:00
"leader_addr" : string ( s . raft . Leader ( ) ) ,
2020-03-02 15:29:24 +00:00
"bootstrap" : fmt . Sprintf ( "%v" , s . isSingleServerCluster ( ) ) ,
2015-08-17 00:40:35 +00:00
"known_regions" : toString ( uint64 ( len ( s . peers ) ) ) ,
2015-06-03 10:26:50 +00:00
} ,
"raft" : s . raft . Stats ( ) ,
2015-06-03 11:08:04 +00:00
"serf" : s . serf . Stats ( ) ,
2018-01-12 21:58:44 +00:00
"runtime" : stats . RuntimeStats ( ) ,
2018-11-02 20:28:51 +00:00
"vault" : s . vault . Stats ( ) ,
2015-06-03 10:26:50 +00:00
}
2017-02-02 21:52:31 +00:00
2015-06-03 10:26:50 +00:00
return stats
}
2016-10-17 17:48:04 +00:00
2019-06-17 20:51:31 +00:00
// EmitRaftStats is used to export metrics about raft indexes and state store snapshot index
2019-06-14 21:30:27 +00:00
func ( s * Server ) EmitRaftStats ( period time . Duration , stopCh <- chan struct { } ) {
2022-02-02 16:59:53 +00:00
timer , stop := helper . NewSafeTimer ( period )
defer stop ( )
2019-06-14 21:30:27 +00:00
for {
2022-02-02 16:59:53 +00:00
timer . Reset ( period )
2019-06-14 21:30:27 +00:00
select {
2022-02-02 16:59:53 +00:00
case <- timer . C :
2019-06-17 20:51:31 +00:00
lastIndex := s . raft . LastIndex ( )
metrics . SetGauge ( [ ] string { "raft" , "lastIndex" } , float32 ( lastIndex ) )
2019-06-14 21:30:27 +00:00
appliedIndex := s . raft . AppliedIndex ( )
metrics . SetGauge ( [ ] string { "raft" , "appliedIndex" } , float32 ( appliedIndex ) )
stateStoreSnapshotIndex , err := s . State ( ) . LatestIndex ( )
if err != nil {
s . logger . Warn ( "Unable to read snapshot index from statestore, metric will not be emitted" , "error" , err )
} else {
metrics . SetGauge ( [ ] string { "state" , "snapshotIndex" } , float32 ( stateStoreSnapshotIndex ) )
}
case <- stopCh :
return
}
}
}
2022-03-03 10:25:29 +00:00
// setReplyQueryMeta is an RPC helper function to properly populate the query
// meta for a read response. It populates the index using a floored value
// obtained from the index table as well as leader and last contact
// information.
//
// If the passed state.StateStore is nil, a new handle is obtained.
func ( s * Server ) setReplyQueryMeta ( stateStore * state . StateStore , table string , reply * structs . QueryMeta ) error {
// Protect against an empty stateStore object to avoid panic.
if stateStore == nil {
stateStore = s . fsm . State ( )
}
// Get the index from the index table and ensure the value is floored to at
// least one.
index , err := stateStore . Index ( table )
if err != nil {
return err
}
2022-08-17 16:26:34 +00:00
reply . Index = helper . Max ( 1 , index )
2022-03-03 10:25:29 +00:00
// Set the query response.
s . setQueryMeta ( reply )
return nil
}
2017-08-07 21:13:05 +00:00
// Region returns the region of the server
2016-10-17 17:48:04 +00:00
func ( s * Server ) Region ( ) string {
return s . config . Region
}
// Datacenter returns the data center of the server
func ( s * Server ) Datacenter ( ) string {
return s . config . Datacenter
}
// GetConfig returns the config of the server for testing purposes only
func ( s * Server ) GetConfig ( ) * Config {
return s . config
}
2017-02-02 23:31:36 +00:00
2017-08-21 03:51:30 +00:00
// ReplicationToken returns the token used for replication. We use a method to support
// dynamic reloading of this value later.
func ( s * Server ) ReplicationToken ( ) string {
return s . config . ReplicationToken
}
2019-11-14 13:18:29 +00:00
// ClusterID returns the unique ID for this cluster.
//
// Any Nomad server agent may call this method to get at the ID.
// If we are the leader and the ID has not yet been created, it will
// be created now. Otherwise an error is returned.
//
// The ID will not be created until all participating servers have reached
2020-01-31 02:21:01 +00:00
// a minimum version (0.10.4).
2019-11-14 13:18:29 +00:00
func ( s * Server ) ClusterID ( ) ( string , error ) {
s . clusterIDLock . Lock ( )
defer s . clusterIDLock . Unlock ( )
// try to load the cluster ID from state store
fsmState := s . fsm . State ( )
2020-06-26 17:23:32 +00:00
existingMeta , err := fsmState . ClusterMetadata ( nil )
2019-11-14 13:18:29 +00:00
if err != nil {
s . logger . Named ( "core" ) . Error ( "failed to get cluster ID" , "error" , err )
return "" , err
}
// got the cluster ID from state store, cache that and return it
if existingMeta != nil && existingMeta . ClusterID != "" {
return existingMeta . ClusterID , nil
}
// if we are not the leader, nothing more we can do
if ! s . IsLeader ( ) {
return "" , errors . New ( "cluster ID not ready yet" )
}
// we are the leader, try to generate the ID now
generatedID , err := s . generateClusterID ( )
if err != nil {
return "" , err
}
return generatedID , nil
}
2020-03-02 15:29:24 +00:00
func ( s * Server ) isSingleServerCluster ( ) bool {
return s . config . BootstrapExpect == 1
}
2017-02-02 23:31:36 +00:00
// peersInfoContent is used to help operators understand what happened to the
// peers.json file. This is written to a file called peers.info in the same
// location.
const peersInfoContent = `
2017-02-08 22:50:19 +00:00
As of Nomad 0.5 .5 , the peers . json file is only used for recovery
2019-05-03 20:27:30 +00:00
after an outage . The format of this file depends on what the server has
2021-09-01 06:30:49 +00:00
configured for its Raft protocol version . Please see the server configuration
page at https : //www.nomadproject.io/docs/configuration/server#raft_protocol for more
2019-05-03 20:27:30 +00:00
details about this parameter .
For Raft protocol version 2 and earlier , this should be formatted as a JSON
2021-09-01 06:30:49 +00:00
array containing the address and port of each Nomad server in the cluster , like
2019-05-03 20:27:30 +00:00
this :
[
2021-09-01 06:30:49 +00:00
"10.1.0.1:4647" ,
"10.1.0.2:4647" ,
"10.1.0.3:4647"
2019-05-03 20:27:30 +00:00
]
For Raft protocol version 3 and later , this should be formatted as a JSON
array containing the node ID , address : port , and suffrage information of each
2021-09-01 06:30:49 +00:00
Nomad server in the cluster , like this :
2019-05-03 20:27:30 +00:00
[
{
"id" : "adf4238a-882b-9ddc-4a9d-5b6758e4159e" ,
2021-09-01 06:30:49 +00:00
"address" : "10.1.0.1:4647" ,
2019-05-03 20:27:30 +00:00
"non_voter" : false
} ,
{
"id" : "8b6dda82-3103-11e7-93ae-92361f002671" ,
2021-09-01 06:30:49 +00:00
"address" : "10.1.0.2:4647" ,
2019-05-03 20:27:30 +00:00
"non_voter" : false
} ,
{
"id" : "97e17742-3103-11e7-93ae-92361f002671" ,
2021-09-01 06:30:49 +00:00
"address" : "10.1.0.3:4647" ,
2019-05-03 20:27:30 +00:00
"non_voter" : false
}
]
The "id" field is the node ID of the server . This can be found in the logs when
the server starts up , or in the "node-id" file inside the server ' s data
directory .
The "address" field is the address and port of the server .
The "non_voter" field controls whether the server is a non - voter , which is used
in some advanced Autopilot configurations , please see
https : //www.nomadproject.io/guides/operations/outage.html for more information. If
"non_voter" is omitted it will default to false , which is typical for most
clusters .
2017-02-02 23:31:36 +00:00
Under normal operation , the peers . json file will not be present .
2017-02-08 22:50:19 +00:00
When Nomad starts for the first time , it will create this peers . info file and
2017-02-02 23:31:36 +00:00
delete any existing peers . json file so that recovery doesn ' t occur on the first
startup .
Once this peers . info file is present , any peers . json file will be ingested at
startup , and will set the Raft peer configuration manually to recover from an
outage . It ' s crucial that all servers in the cluster are shut down before
creating the peers . json file , and that all servers receive the same
configuration . Once the peers . json file is successfully ingested and applied , it
will be deleted .
2017-02-10 23:37:13 +00:00
Please see https : //www.nomadproject.io/guides/outage.html for more information.
2017-02-02 23:31:36 +00:00
`