Evals track blocked evals they create
This commit is contained in:
parent
16353c48a7
commit
1feb57b047
|
@ -67,6 +67,7 @@ type Evaluation struct {
|
|||
Wait time.Duration
|
||||
NextEval string
|
||||
PreviousEval string
|
||||
SpawnedBlockedEval string
|
||||
FailedTGAllocs map[string]*AllocationMetric
|
||||
CreateIndex uint64
|
||||
ModifyIndex uint64
|
||||
|
|
|
@ -2617,6 +2617,11 @@ type Evaluation struct {
|
|||
// This is used to support rolling upgrades, where we need a chain of evaluations.
|
||||
PreviousEval string
|
||||
|
||||
// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
|
||||
// blocked eval will be created if all allocations could not be placed due
|
||||
// to constraints or lacking resources.
|
||||
SpawnedBlockedEval string
|
||||
|
||||
// FailedTGAllocs are task groups which have allocations that could not be
|
||||
// made, but the metrics are persisted so that the user can use the feedback
|
||||
// to determine the cause.
|
||||
|
|
|
@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
|||
default:
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
|
||||
eval.TriggeredBy)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
|
||||
}
|
||||
|
||||
// Retry up to the maxScheduleAttempts and reset if progress is made.
|
||||
|
@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
|||
if err := s.createBlockedEval(); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
|
||||
if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
|
@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
|
|||
}
|
||||
|
||||
// Update the status to complete
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
|
||||
}
|
||||
|
||||
// createBlockedEval creates a blocked eval and stores it.
|
||||
|
|
|
@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
|
|||
t.Fatalf("expected no annotations")
|
||||
}
|
||||
|
||||
// Ensure the eval has no spawned blocked eval
|
||||
if len(h.Evals) != 1 {
|
||||
t.Fatalf("bad: %#v", h.Evals)
|
||||
if h.Evals[0].SpawnedBlockedEval != "" {
|
||||
t.Fatalf("bad: %#v", h.Evals[0])
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the plan allocated
|
||||
var planned []*structs.Allocation
|
||||
for _, allocList := range plan.NodeAllocation {
|
||||
|
@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
|
|||
}
|
||||
outEval := h.Evals[0]
|
||||
|
||||
// Ensure the eval has its spawned blocked eval
|
||||
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
|
||||
t.Fatalf("bad: %#v", outEval)
|
||||
}
|
||||
|
||||
// Ensure the plan failed to alloc
|
||||
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
|
||||
t.Fatalf("bad: %#v", outEval)
|
||||
|
@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
|
|||
t.Fatalf("bad: %#v", out)
|
||||
}
|
||||
|
||||
// Ensure the plan failed to alloc one tg
|
||||
if len(h.Evals) != 1 {
|
||||
t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
|
||||
}
|
||||
outEval := h.Evals[0]
|
||||
|
||||
// Ensure the eval has its spawned blocked eval
|
||||
if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
|
||||
t.Fatalf("bad: %#v", outEval)
|
||||
}
|
||||
|
||||
// Ensure the plan failed to alloc one tg
|
||||
if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
|
||||
t.Fatalf("bad: %#v", outEval)
|
||||
}
|
||||
|
|
|
@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
|
|||
default:
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
|
||||
eval.TriggeredBy)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
|
||||
}
|
||||
|
||||
// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
|
||||
progress := func() bool { return progressMade(s.planResult) }
|
||||
if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
|
||||
if statusErr, ok := err.(*SetStatusError); ok {
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Update the status to complete
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
|
||||
}
|
||||
|
||||
// process is wrapped in retryMax to iteratively run the handler until we have no
|
||||
|
|
|
@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
|
|||
}
|
||||
|
||||
// setStatus is used to update the status of the evaluation
|
||||
func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
|
||||
func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
|
||||
logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
|
||||
newEval := eval.Copy()
|
||||
newEval.Status = status
|
||||
|
@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
|
|||
if nextEval != nil {
|
||||
newEval.NextEval = nextEval.ID
|
||||
}
|
||||
if spawnedBlocked != nil {
|
||||
newEval.SpawnedBlockedEval = spawnedBlocked.ID
|
||||
}
|
||||
return planner.UpdateEval(newEval)
|
||||
}
|
||||
|
||||
|
|
|
@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
|
|||
eval := mock.Eval()
|
||||
status := "a"
|
||||
desc := "b"
|
||||
if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
|
||||
if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
|
||||
t.Fatalf("setStatus() failed: %v", err)
|
||||
}
|
||||
|
||||
|
@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
|
|||
t.Fatalf("setStatus() submited invalid eval: %v", newEval)
|
||||
}
|
||||
|
||||
// Test next evals
|
||||
h = NewHarness(t)
|
||||
next := mock.Eval()
|
||||
if err := setStatus(logger, h, eval, next, status, desc); err != nil {
|
||||
if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
|
||||
t.Fatalf("setStatus() failed: %v", err)
|
||||
}
|
||||
|
||||
|
@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
|
|||
if newEval.NextEval != next.ID {
|
||||
t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
|
||||
}
|
||||
|
||||
// Test blocked evals
|
||||
h = NewHarness(t)
|
||||
blocked := mock.Eval()
|
||||
if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
|
||||
t.Fatalf("setStatus() failed: %v", err)
|
||||
}
|
||||
|
||||
if len(h.Evals) != 1 {
|
||||
t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
|
||||
}
|
||||
|
||||
newEval = h.Evals[0]
|
||||
if newEval.SpawnedBlockedEval != blocked.ID {
|
||||
t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {
|
||||
|
|
Loading…
Reference in a new issue