nomad: parameterize heartbeat configuration
This commit is contained in:
parent
25c7aa7e47
commit
5155f2c8eb
|
@ -146,6 +146,25 @@ type Config struct {
|
|||
// process an evaluation. This is used so that an eval that will never
|
||||
// complete eventually fails out of the system.
|
||||
EvalDeliveryLimit int
|
||||
|
||||
// MinHeartbeatTTL is the minimum time between heartbeats.
|
||||
// This is used as a floor to prevent excessive updates.
|
||||
MinHeartbeatTTL time.Duration
|
||||
|
||||
// MaxHeartbeatsPerSecond is the maximum target rate of heartbeats
|
||||
// being processed per second. This allows the TTL to be increased
|
||||
// to meet the target rate.
|
||||
MaxHeartbeatsPerSecond float64
|
||||
|
||||
// HeartbeatGrace is the additional time given as a grace period
|
||||
// beyond the TTL to account for network and processing delays
|
||||
// as well as clock skew.
|
||||
HeartbeatGrace time.Duration
|
||||
|
||||
// FailoverHeartbeatTTL is the TTL applied to heartbeats after
|
||||
// a new leader is elected, since we no longer know the status
|
||||
// of all the heartbeats.
|
||||
FailoverHeartbeatTTL time.Duration
|
||||
}
|
||||
|
||||
// CheckVersion is used to check if the ProtocolVersion is valid
|
||||
|
@ -168,21 +187,25 @@ func DefaultConfig() *Config {
|
|||
}
|
||||
|
||||
c := &Config{
|
||||
Region: DefaultRegion,
|
||||
Datacenter: DefaultDC,
|
||||
NodeName: hostname,
|
||||
ProtocolVersion: ProtocolVersionMax,
|
||||
RaftConfig: raft.DefaultConfig(),
|
||||
RaftTimeout: 10 * time.Second,
|
||||
RPCAddr: DefaultRPCAddr,
|
||||
SerfConfig: serf.DefaultConfig(),
|
||||
NumSchedulers: 1,
|
||||
ServerAddress: []string{"nomad.service.consul:4647"},
|
||||
ReconcileInterval: 60 * time.Second,
|
||||
EvalGCInterval: 60 * time.Second,
|
||||
EvalGCThreshold: 1 * time.Hour,
|
||||
EvalNackTimeout: 60 * time.Second,
|
||||
EvalDeliveryLimit: 3,
|
||||
Region: DefaultRegion,
|
||||
Datacenter: DefaultDC,
|
||||
NodeName: hostname,
|
||||
ProtocolVersion: ProtocolVersionMax,
|
||||
RaftConfig: raft.DefaultConfig(),
|
||||
RaftTimeout: 10 * time.Second,
|
||||
RPCAddr: DefaultRPCAddr,
|
||||
SerfConfig: serf.DefaultConfig(),
|
||||
NumSchedulers: 1,
|
||||
ServerAddress: []string{"nomad.service.consul:4647"},
|
||||
ReconcileInterval: 60 * time.Second,
|
||||
EvalGCInterval: 60 * time.Second,
|
||||
EvalGCThreshold: 1 * time.Hour,
|
||||
EvalNackTimeout: 60 * time.Second,
|
||||
EvalDeliveryLimit: 3,
|
||||
MinHeartbeatTTL: 10 * time.Second,
|
||||
MaxHeartbeatsPerSecond: 50.0,
|
||||
HeartbeatGrace: 10 * time.Second,
|
||||
FailoverHeartbeatTTL: 300 * time.Second,
|
||||
}
|
||||
|
||||
// Enable all known schedulers by default
|
||||
|
|
|
@ -7,25 +7,6 @@ import (
|
|||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// defaultHeartbeatTTL is the TTL value used for heartbeats
|
||||
// when they are first initialized. This should be longer than
|
||||
// the usual TTL since clients are switching to a new leader.
|
||||
defaultHeartbeatTTL = 300 * time.Second
|
||||
|
||||
// minHeartbeatTTL is the minimum heartbeat interval.
|
||||
minHeartbeatTTL = 10 * time.Second
|
||||
|
||||
// maxHeartbeatsPerSecond is the targeted maximum rate of heartbeats.
|
||||
// As the cluster size grows, we simply increase the heartbeat TTL
|
||||
// to approach this value.
|
||||
maxHeartbeatsPerSecond = 50.0
|
||||
|
||||
// heartbeatGrace is the additional time given to the TTL period
|
||||
// as a grace. This is to account for various network and processing delays.
|
||||
heartbeatGrace = 10 * time.Second
|
||||
)
|
||||
|
||||
// initializeHeartbeatTimers is used when a leader is newly elected to create
|
||||
// a new map to track heartbeat expiration and to reset all the timers from
|
||||
// the previously known set of timers.
|
||||
|
@ -55,7 +36,7 @@ func (s *Server) initializeHeartbeatTimers() error {
|
|||
if node.TerminalStatus() {
|
||||
continue
|
||||
}
|
||||
s.resetHeartbeatTimerLocked(node.ID, defaultHeartbeatTTL)
|
||||
s.resetHeartbeatTimerLocked(node.ID, s.config.FailoverHeartbeatTTL)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -68,11 +49,12 @@ func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) {
|
|||
|
||||
// Compute the target TTL value
|
||||
n := len(s.heartbeatTimers)
|
||||
ttl := rateScaledInterval(maxHeartbeatsPerSecond, minHeartbeatTTL, n)
|
||||
ttl := rateScaledInterval(s.config.MaxHeartbeatsPerSecond,
|
||||
s.config.MinHeartbeatTTL, n)
|
||||
ttl += randomStagger(ttl)
|
||||
|
||||
// Reset the TTL
|
||||
s.resetHeartbeatTimerLocked(id, ttl+heartbeatGrace)
|
||||
s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace)
|
||||
return ttl, nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue