Evals track blocked evals they create

This commit is contained in:
Alex Dadgar 2016-05-19 13:09:52 -07:00
parent 16353c48a7
commit 1feb57b047
7 changed files with 70 additions and 26 deletions

View file

@ -67,6 +67,7 @@ type Evaluation struct {
Wait time.Duration
NextEval string
PreviousEval string
SpawnedBlockedEval string
FailedTGAllocs map[string]*AllocationMetric
CreateIndex uint64
ModifyIndex uint64

View file

@ -2617,6 +2617,11 @@ type Evaluation struct {
// This is used to support rolling upgrades, where we need a chain of evaluations.
PreviousEval string
// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
// blocked eval will be created if all allocations could not be placed due
// to constraints or lacking resources.
SpawnedBlockedEval string
// FailedTGAllocs are task groups which have allocations that could not be
// made, but the metrics are persisted so that the user can use the feedback
// to determine the cause.

View file

@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
}
// Retry up to the maxScheduleAttempts and reset if progress is made.
@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
if err := s.createBlockedEval(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
return mErr.ErrorOrNil()
@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
}
// Update the status to complete
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
}
// createBlockedEval creates a blocked eval and stores it.

View file

@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
t.Fatalf("expected no annotations")
}
// Ensure the eval has no spawned blocked eval
if len(h.Evals) != 1 {
t.Fatalf("bad: %#v", h.Evals)
if h.Evals[0].SpawnedBlockedEval != "" {
t.Fatalf("bad: %#v", h.Evals[0])
}
}
// Ensure the plan allocated
var planned []*structs.Allocation
for _, allocList := range plan.NodeAllocation {
@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
}
outEval := h.Evals[0]
// Ensure the eval has its spawned blocked eval
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
t.Fatalf("bad: %#v", outEval)
}
// Ensure the plan failed to alloc
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
t.Fatalf("bad: %#v", outEval)
@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
t.Fatalf("bad: %#v", out)
}
// Ensure the plan failed to alloc one tg
if len(h.Evals) != 1 {
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
}
outEval := h.Evals[0]
// Ensure the eval has its spawned blocked eval
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
t.Fatalf("bad: %#v", outEval)
}
// Ensure the plan failed to alloc one tg
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
t.Fatalf("bad: %#v", outEval)
}

View file

@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
}
// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
progress := func() bool { return progressMade(s.planResult) }
if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
if statusErr, ok := err.(*SetStatusError); ok {
return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
}
return err
}
// Update the status to complete
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
}
// process is wrapped in retryMax to iteratively run the handler until we have no

View file

@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
}
// setStatus is used to update the status of the evaluation
func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
newEval := eval.Copy()
newEval.Status = status
@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
if nextEval != nil {
newEval.NextEval = nextEval.ID
}
if spawnedBlocked != nil {
newEval.SpawnedBlockedEval = spawnedBlocked.ID
}
return planner.UpdateEval(newEval)
}

View file

@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
eval := mock.Eval()
status := "a"
desc := "b"
if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
t.Fatalf("setStatus() submited invalid eval: %v", newEval)
}
// Test next evals
h = NewHarness(t)
next := mock.Eval()
if err := setStatus(logger, h, eval, next, status, desc); err != nil {
if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
if newEval.NextEval != next.ID {
t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
}
// Test blocked evals
h = NewHarness(t)
blocked := mock.Eval()
if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
if len(h.Evals) != 1 {
t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
}
newEval = h.Evals[0]
if newEval.SpawnedBlockedEval != blocked.ID {
t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
}
}
func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {