From 5155f2c8eb775ddb6fe6ba2a79e86ce313b25376 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Sat, 29 Aug 2015 14:14:19 -0700 Subject: [PATCH] nomad: parameterize heartbeat configuration --- nomad/config.go | 53 +++++++++++++++++++++++++++++++++------------- nomad/heartbeat.go | 26 ++++------------------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/nomad/config.go b/nomad/config.go index cfc5d5b95..767fb4e13 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -146,6 +146,25 @@ type Config struct { // process an evaluation. This is used so that an eval that will never // complete eventually fails out of the system. EvalDeliveryLimit int + + // MinHeartbeatTTL is the minimum time between heartbeats. + // This is used as a floor to prevent excessive updates. + MinHeartbeatTTL time.Duration + + // MaxHeartbeatsPerSecond is the maximum target rate of heartbeats + // being processed per second. This allows the TTL to be increased + // to meet the target rate. + MaxHeartbeatsPerSecond float64 + + // HeartbeatGrace is the additional time given as a grace period + // beyond the TTL to account for network and processing delays + // as well as clock skew. + HeartbeatGrace time.Duration + + // FailoverHeartbeatTTL is the TTL applied to heartbeats after + // a new leader is elected, since we no longer know the status + // of all the heartbeats. + FailoverHeartbeatTTL time.Duration } // CheckVersion is used to check if the ProtocolVersion is valid @@ -168,21 +187,25 @@ func DefaultConfig() *Config { } c := &Config{ - Region: DefaultRegion, - Datacenter: DefaultDC, - NodeName: hostname, - ProtocolVersion: ProtocolVersionMax, - RaftConfig: raft.DefaultConfig(), - RaftTimeout: 10 * time.Second, - RPCAddr: DefaultRPCAddr, - SerfConfig: serf.DefaultConfig(), - NumSchedulers: 1, - ServerAddress: []string{"nomad.service.consul:4647"}, - ReconcileInterval: 60 * time.Second, - EvalGCInterval: 60 * time.Second, - EvalGCThreshold: 1 * time.Hour, - EvalNackTimeout: 60 * time.Second, - EvalDeliveryLimit: 3, + Region: DefaultRegion, + Datacenter: DefaultDC, + NodeName: hostname, + ProtocolVersion: ProtocolVersionMax, + RaftConfig: raft.DefaultConfig(), + RaftTimeout: 10 * time.Second, + RPCAddr: DefaultRPCAddr, + SerfConfig: serf.DefaultConfig(), + NumSchedulers: 1, + ServerAddress: []string{"nomad.service.consul:4647"}, + ReconcileInterval: 60 * time.Second, + EvalGCInterval: 60 * time.Second, + EvalGCThreshold: 1 * time.Hour, + EvalNackTimeout: 60 * time.Second, + EvalDeliveryLimit: 3, + MinHeartbeatTTL: 10 * time.Second, + MaxHeartbeatsPerSecond: 50.0, + HeartbeatGrace: 10 * time.Second, + FailoverHeartbeatTTL: 300 * time.Second, } // Enable all known schedulers by default diff --git a/nomad/heartbeat.go b/nomad/heartbeat.go index e52c4ad21..1fdf91f4d 100644 --- a/nomad/heartbeat.go +++ b/nomad/heartbeat.go @@ -7,25 +7,6 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -const ( - // defaultHeartbeatTTL is the TTL value used for heartbeats - // when they are first initialized. This should be longer than - // the usual TTL since clients are switching to a new leader. - defaultHeartbeatTTL = 300 * time.Second - - // minHeartbeatTTL is the minimum heartbeat interval. - minHeartbeatTTL = 10 * time.Second - - // maxHeartbeatsPerSecond is the targeted maximum rate of heartbeats. - // As the cluster size grows, we simply increase the heartbeat TTL - // to approach this value. - maxHeartbeatsPerSecond = 50.0 - - // heartbeatGrace is the additional time given to the TTL period - // as a grace. This is to account for various network and processing delays. - heartbeatGrace = 10 * time.Second -) - // initializeHeartbeatTimers is used when a leader is newly elected to create // a new map to track heartbeat expiration and to reset all the timers from // the previously known set of timers. @@ -55,7 +36,7 @@ func (s *Server) initializeHeartbeatTimers() error { if node.TerminalStatus() { continue } - s.resetHeartbeatTimerLocked(node.ID, defaultHeartbeatTTL) + s.resetHeartbeatTimerLocked(node.ID, s.config.FailoverHeartbeatTTL) } return nil } @@ -68,11 +49,12 @@ func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) { // Compute the target TTL value n := len(s.heartbeatTimers) - ttl := rateScaledInterval(maxHeartbeatsPerSecond, minHeartbeatTTL, n) + ttl := rateScaledInterval(s.config.MaxHeartbeatsPerSecond, + s.config.MinHeartbeatTTL, n) ttl += randomStagger(ttl) // Reset the TTL - s.resetHeartbeatTimerLocked(id, ttl+heartbeatGrace) + s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace) return ttl, nil }