nomad: add eval broker, configurable nack timeout

This commit is contained in:
Armon Dadgar 2015-07-23 21:44:17 -07:00
parent 5fcc39c399
commit fc11808fe9
4 changed files with 22 additions and 12 deletions

View File

@ -107,6 +107,13 @@ type Config struct {
// that are force removed, as well as intermittent unavailability during
// leader election.
ReconcileInterval time.Duration
// EvalNackTimeout controls how long we allow a sub-scheduler to
// work on an evaluation before we consider it failed and Nack it.
// This allows that evaluation to be handed to another sub-scheduler
// to work on. Defaults to 60 seconds. This should be long enough that
// no evaluation hits it unless the sub-scheduler has failed.
EvalNackTimeout time.Duration
}
// CheckVersion is used to check if the ProtocolVersion is valid
@ -137,6 +144,7 @@ func DefaultConfig() *Config {
RPCAddr: DefaultRPCAddr,
SerfConfig: serf.DefaultConfig(),
ReconcileInterval: 60 * time.Second,
EvalNackTimeout: 60 * time.Second,
}
// Increase our reap interval to 3 days instead of 24h.

View File

@ -10,14 +10,6 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// DefaultNackTimeout is the default amount of time between when
// a message is dequeued and no Ack/Nack is received before it is
// assumed failed and Nack'd automatically. This is used to recover
// from the failure of a sub-scheduler.
DefaultNackTimeout = 60 * time.Second
)
// EvalBroker is used to manage brokering of evaluations. When an evaluation is
// created, due to a change in a job specification or a node, we put it into the
// broker. The broker sorts by evaluations by priority and scheduler type. This

View File

@ -16,7 +16,7 @@ var (
func testBroker(t *testing.T, timeout time.Duration) *EvalBroker {
if timeout == 0 {
timeout = DefaultNackTimeout
timeout = 5 * time.Second
}
b, err := NewEvalBroker(timeout)
if err != nil {
@ -246,9 +246,9 @@ func TestEvalBroker_Dequeue_Fairness(t *testing.T) {
counter -= 1
}
// The odds are less than 1/1024 that
// The odds are less than 1/65536 that
// we see the same sequence 10 times in a row
if counter >= 10 || counter <= -10 {
if counter >= 16 || counter <= -16 {
t.Fatalf("unlikely sequence: %d", counter)
}
}

View File

@ -91,6 +91,10 @@ type Server struct {
// eventCh is used to receive events from the serf cluster
eventCh chan serf.Event
// evalBroker is used to manage the in-progress evaluations
// that are waiting to be brokered to a sub-scheduler
evalBroker *EvalBroker
left bool
shutdown bool
shutdownCh chan struct{}
@ -121,6 +125,12 @@ func NewServer(config *Config) (*Server, error) {
// Create a logger
logger := log.New(config.LogOutput, "", log.LstdFlags)
// Create an eval broker
evalBroker, err := NewEvalBroker(config.EvalNackTimeout)
if err != nil {
return nil, err
}
// Create the server
s := &Server{
config: config,
@ -131,6 +141,7 @@ func NewServer(config *Config) (*Server, error) {
localPeers: make(map[string]*serverParts),
reconcileCh: make(chan serf.Member, 32),
eventCh: make(chan serf.Event, 256),
evalBroker: evalBroker,
shutdownCh: make(chan struct{}),
}
@ -148,7 +159,6 @@ func NewServer(config *Config) (*Server, error) {
}
// Initialize the wan Serf
var err error
s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot)
if err != nil {
s.Shutdown()