From 1feb57b0474cc4eb72579e1b47a7bf1e7e41345d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 19 May 2016 13:09:52 -0700 Subject: [PATCH] Evals track blocked evals they create --- api/evaluations.go | 33 +++++++++++++++++---------------- nomad/structs/structs.go | 5 +++++ scheduler/generic_sched.go | 6 +++--- scheduler/generic_sched_test.go | 20 +++++++++++++++++++- scheduler/system_sched.go | 6 +++--- scheduler/util.go | 5 ++++- scheduler/util_test.go | 21 +++++++++++++++++++-- 7 files changed, 70 insertions(+), 26 deletions(-) diff --git a/api/evaluations.go b/api/evaluations.go index 6d05a4da0..a10c3cba5 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -54,22 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation // Evaluation is used to serialize an evaluation. type Evaluation struct { - ID string - Priority int - Type string - TriggeredBy string - JobID string - JobModifyIndex uint64 - NodeID string - NodeModifyIndex uint64 - Status string - StatusDescription string - Wait time.Duration - NextEval string - PreviousEval string - FailedTGAllocs map[string]*AllocationMetric - CreateIndex uint64 - ModifyIndex uint64 + ID string + Priority int + Type string + TriggeredBy string + JobID string + JobModifyIndex uint64 + NodeID string + NodeModifyIndex uint64 + Status string + StatusDescription string + Wait time.Duration + NextEval string + PreviousEval string + SpawnedBlockedEval string + FailedTGAllocs map[string]*AllocationMetric + CreateIndex uint64 + ModifyIndex uint64 } // EvalIndexSort is a wrapper to sort evaluations by CreateIndex. diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 45e9e64e5..577893850 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2617,6 +2617,11 @@ type Evaluation struct { // This is used to support rolling upgrades, where we need a chain of evaluations. PreviousEval string + // SpawnedBlockedEval is the evaluation ID for a created blocked eval. A + // blocked eval will be created if all allocations could not be placed due + // to constraints or lacking resources. + SpawnedBlockedEval string + // FailedTGAllocs are task groups which have allocations that could not be // made, but the metrics are persisted so that the user can use the feedback // to determine the cause. diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 0e7d838e4..2595fbac7 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) - return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc) + return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc) } // Retry up to the maxScheduleAttempts and reset if progress is made. @@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { if err := s.createBlockedEval(); err != nil { mErr.Errors = append(mErr.Errors, err) } - if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil { + if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil { mErr.Errors = append(mErr.Errors, err) } return mErr.ErrorOrNil() @@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { } // Update the status to complete - return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "") + return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "") } // createBlockedEval creates a blocked eval and stores it. diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 09b980155..879da2a83 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("expected no annotations") } + // Ensure the eval has no spawned blocked eval + if len(h.Evals) != 1 { + t.Fatalf("bad: %#v", h.Evals) + if h.Evals[0].SpawnedBlockedEval != "" { + t.Fatalf("bad: %#v", h.Evals[0]) + } + } + // Ensure the plan allocated var planned []*structs.Allocation for _, allocList := range plan.NodeAllocation { @@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) { } outEval := h.Evals[0] + // Ensure the eval has its spawned blocked eval + if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID { + t.Fatalf("bad: %#v", outEval) + } + // Ensure the plan failed to alloc if outEval == nil || len(outEval.FailedTGAllocs) != 1 { t.Fatalf("bad: %#v", outEval) @@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) { t.Fatalf("bad: %#v", out) } - // Ensure the plan failed to alloc one tg if len(h.Evals) != 1 { t.Fatalf("incorrect number of updated eval: %#v", h.Evals) } outEval := h.Evals[0] + // Ensure the eval has its spawned blocked eval + if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID { + t.Fatalf("bad: %#v", outEval) + } + + // Ensure the plan failed to alloc one tg if outEval == nil || len(outEval.FailedTGAllocs) != 1 { t.Fatalf("bad: %#v", outEval) } diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index e5a559e23..e0bef083a 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error { default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) - return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc) + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc) } // Retry up to the maxSystemScheduleAttempts and reset if progress is made. progress := func() bool { return progressMade(s.planResult) } if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil { if statusErr, ok := err.(*SetStatusError); ok { - return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()) + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error()) } return err } // Update the status to complete - return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "") + return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "") } // process is wrapped in retryMax to iteratively run the handler until we have no diff --git a/scheduler/util.go b/scheduler/util.go index 4dc411dc9..89ad49dc9 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int { } // setStatus is used to update the status of the evaluation -func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error { +func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error { logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status) newEval := eval.Copy() newEval.Status = status @@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval if nextEval != nil { newEval.NextEval = nextEval.ID } + if spawnedBlocked != nil { + newEval.SpawnedBlockedEval = spawnedBlocked.ID + } return planner.UpdateEval(newEval) } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 1d689fe2b..73fc29f8f 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) { eval := mock.Eval() status := "a" desc := "b" - if err := setStatus(logger, h, eval, nil, status, desc); err != nil { + if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil { t.Fatalf("setStatus() failed: %v", err) } @@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) { t.Fatalf("setStatus() submited invalid eval: %v", newEval) } + // Test next evals h = NewHarness(t) next := mock.Eval() - if err := setStatus(logger, h, eval, next, status, desc); err != nil { + if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil { t.Fatalf("setStatus() failed: %v", err) } @@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) { if newEval.NextEval != next.ID { t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval) } + + // Test blocked evals + h = NewHarness(t) + blocked := mock.Eval() + if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil { + t.Fatalf("setStatus() failed: %v", err) + } + + if len(h.Evals) != 1 { + t.Fatalf("setStatus() didn't update plan: %v", h.Evals) + } + + newEval = h.Evals[0] + if newEval.SpawnedBlockedEval != blocked.ID { + t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval) + } } func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {