nomad: parameterize heartbeat configuration

This commit is contained in:
Armon Dadgar 2015-08-29 14:14:19 -07:00
parent 25c7aa7e47
commit 5155f2c8eb
2 changed files with 42 additions and 37 deletions

View File

@ -146,6 +146,25 @@ type Config struct {
// process an evaluation. This is used so that an eval that will never // process an evaluation. This is used so that an eval that will never
// complete eventually fails out of the system. // complete eventually fails out of the system.
EvalDeliveryLimit int EvalDeliveryLimit int
// MinHeartbeatTTL is the minimum time between heartbeats.
// This is used as a floor to prevent excessive updates.
MinHeartbeatTTL time.Duration
// MaxHeartbeatsPerSecond is the maximum target rate of heartbeats
// being processed per second. This allows the TTL to be increased
// to meet the target rate.
MaxHeartbeatsPerSecond float64
// HeartbeatGrace is the additional time given as a grace period
// beyond the TTL to account for network and processing delays
// as well as clock skew.
HeartbeatGrace time.Duration
// FailoverHeartbeatTTL is the TTL applied to heartbeats after
// a new leader is elected, since we no longer know the status
// of all the heartbeats.
FailoverHeartbeatTTL time.Duration
} }
// CheckVersion is used to check if the ProtocolVersion is valid // CheckVersion is used to check if the ProtocolVersion is valid
@ -168,21 +187,25 @@ func DefaultConfig() *Config {
} }
c := &Config{ c := &Config{
Region: DefaultRegion, Region: DefaultRegion,
Datacenter: DefaultDC, Datacenter: DefaultDC,
NodeName: hostname, NodeName: hostname,
ProtocolVersion: ProtocolVersionMax, ProtocolVersion: ProtocolVersionMax,
RaftConfig: raft.DefaultConfig(), RaftConfig: raft.DefaultConfig(),
RaftTimeout: 10 * time.Second, RaftTimeout: 10 * time.Second,
RPCAddr: DefaultRPCAddr, RPCAddr: DefaultRPCAddr,
SerfConfig: serf.DefaultConfig(), SerfConfig: serf.DefaultConfig(),
NumSchedulers: 1, NumSchedulers: 1,
ServerAddress: []string{"nomad.service.consul:4647"}, ServerAddress: []string{"nomad.service.consul:4647"},
ReconcileInterval: 60 * time.Second, ReconcileInterval: 60 * time.Second,
EvalGCInterval: 60 * time.Second, EvalGCInterval: 60 * time.Second,
EvalGCThreshold: 1 * time.Hour, EvalGCThreshold: 1 * time.Hour,
EvalNackTimeout: 60 * time.Second, EvalNackTimeout: 60 * time.Second,
EvalDeliveryLimit: 3, EvalDeliveryLimit: 3,
MinHeartbeatTTL: 10 * time.Second,
MaxHeartbeatsPerSecond: 50.0,
HeartbeatGrace: 10 * time.Second,
FailoverHeartbeatTTL: 300 * time.Second,
} }
// Enable all known schedulers by default // Enable all known schedulers by default

View File

@ -7,25 +7,6 @@ import (
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
) )
const (
// defaultHeartbeatTTL is the TTL value used for heartbeats
// when they are first initialized. This should be longer than
// the usual TTL since clients are switching to a new leader.
defaultHeartbeatTTL = 300 * time.Second
// minHeartbeatTTL is the minimum heartbeat interval.
minHeartbeatTTL = 10 * time.Second
// maxHeartbeatsPerSecond is the targeted maximum rate of heartbeats.
// As the cluster size grows, we simply increase the heartbeat TTL
// to approach this value.
maxHeartbeatsPerSecond = 50.0
// heartbeatGrace is the additional time given to the TTL period
// as a grace. This is to account for various network and processing delays.
heartbeatGrace = 10 * time.Second
)
// initializeHeartbeatTimers is used when a leader is newly elected to create // initializeHeartbeatTimers is used when a leader is newly elected to create
// a new map to track heartbeat expiration and to reset all the timers from // a new map to track heartbeat expiration and to reset all the timers from
// the previously known set of timers. // the previously known set of timers.
@ -55,7 +36,7 @@ func (s *Server) initializeHeartbeatTimers() error {
if node.TerminalStatus() { if node.TerminalStatus() {
continue continue
} }
s.resetHeartbeatTimerLocked(node.ID, defaultHeartbeatTTL) s.resetHeartbeatTimerLocked(node.ID, s.config.FailoverHeartbeatTTL)
} }
return nil return nil
} }
@ -68,11 +49,12 @@ func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) {
// Compute the target TTL value // Compute the target TTL value
n := len(s.heartbeatTimers) n := len(s.heartbeatTimers)
ttl := rateScaledInterval(maxHeartbeatsPerSecond, minHeartbeatTTL, n) ttl := rateScaledInterval(s.config.MaxHeartbeatsPerSecond,
s.config.MinHeartbeatTTL, n)
ttl += randomStagger(ttl) ttl += randomStagger(ttl)
// Reset the TTL // Reset the TTL
s.resetHeartbeatTimerLocked(id, ttl+heartbeatGrace) s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace)
return ttl, nil return ttl, nil
} }