Evals track blocked evals they create

This commit is contained in:
Alex Dadgar 2016-05-19 13:09:52 -07:00
parent 16353c48a7
commit 1feb57b047
7 changed files with 70 additions and 26 deletions

View file

@ -54,22 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation
// Evaluation is used to serialize an evaluation.
type Evaluation struct {
ID string
Priority int
Type string
TriggeredBy string
JobID string
JobModifyIndex uint64
NodeID string
NodeModifyIndex uint64
Status string
StatusDescription string
Wait time.Duration
NextEval string
PreviousEval string
FailedTGAllocs map[string]*AllocationMetric
CreateIndex uint64
ModifyIndex uint64
ID string
Priority int
Type string
TriggeredBy string
JobID string
JobModifyIndex uint64
NodeID string
NodeModifyIndex uint64
Status string
StatusDescription string
Wait time.Duration
NextEval string
PreviousEval string
SpawnedBlockedEval string
FailedTGAllocs map[string]*AllocationMetric
CreateIndex uint64
ModifyIndex uint64
}
// EvalIndexSort is a wrapper to sort evaluations by CreateIndex.

View file

@ -2617,6 +2617,11 @@ type Evaluation struct {
// This is used to support rolling upgrades, where we need a chain of evaluations.
PreviousEval string
// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
// blocked eval will be created if all allocations could not be placed due
// to constraints or lacking resources.
SpawnedBlockedEval string
// FailedTGAllocs are task groups which have allocations that could not be
// made, but the metrics are persisted so that the user can use the feedback
// to determine the cause.

View file

@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
}
// Retry up to the maxScheduleAttempts and reset if progress is made.
@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
if err := s.createBlockedEval(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
return mErr.ErrorOrNil()
@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
}
// Update the status to complete
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
}
// createBlockedEval creates a blocked eval and stores it.

View file

@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
t.Fatalf("expected no annotations")
}
// Ensure the eval has no spawned blocked eval
if len(h.Evals) != 1 {
t.Fatalf("bad: %#v", h.Evals)
if h.Evals[0].SpawnedBlockedEval != "" {
t.Fatalf("bad: %#v", h.Evals[0])
}
}
// Ensure the plan allocated
var planned []*structs.Allocation
for _, allocList := range plan.NodeAllocation {
@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
}
outEval := h.Evals[0]
// Ensure the eval has its spawned blocked eval
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
t.Fatalf("bad: %#v", outEval)
}
// Ensure the plan failed to alloc
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
t.Fatalf("bad: %#v", outEval)
@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
t.Fatalf("bad: %#v", out)
}
// Ensure the plan failed to alloc one tg
if len(h.Evals) != 1 {
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
}
outEval := h.Evals[0]
// Ensure the eval has its spawned blocked eval
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
t.Fatalf("bad: %#v", outEval)
}
// Ensure the plan failed to alloc one tg
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
t.Fatalf("bad: %#v", outEval)
}

View file

@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
default:
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
eval.TriggeredBy)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
}
// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
progress := func() bool { return progressMade(s.planResult) }
if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
if statusErr, ok := err.(*SetStatusError); ok {
return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
}
return err
}
// Update the status to complete
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
}
// process is wrapped in retryMax to iteratively run the handler until we have no

View file

@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
}
// setStatus is used to update the status of the evaluation
func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
newEval := eval.Copy()
newEval.Status = status
@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
if nextEval != nil {
newEval.NextEval = nextEval.ID
}
if spawnedBlocked != nil {
newEval.SpawnedBlockedEval = spawnedBlocked.ID
}
return planner.UpdateEval(newEval)
}

View file

@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
eval := mock.Eval()
status := "a"
desc := "b"
if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
t.Fatalf("setStatus() submited invalid eval: %v", newEval)
}
// Test next evals
h = NewHarness(t)
next := mock.Eval()
if err := setStatus(logger, h, eval, next, status, desc); err != nil {
if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
if newEval.NextEval != next.ID {
t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
}
// Test blocked evals
h = NewHarness(t)
blocked := mock.Eval()
if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
t.Fatalf("setStatus() failed: %v", err)
}
if len(h.Evals) != 1 {
t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
}
newEval = h.Evals[0]
if newEval.SpawnedBlockedEval != blocked.ID {
t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
}
}
func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {