Evals track blocked evals they create

2016-05-19 13:09:52 -07:00 · 2016-05-19 13:09:52 -07:00 · 1feb57b047
parent 16353c48a7
commit 1feb57b047
7 changed files with 70 additions and 26 deletions
--- a/api/evaluations.go
+++ b/api/evaluations.go
@ -54,22 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation

 // Evaluation is used to serialize an evaluation.
 type Evaluation struct {
-	ID                string
-	Priority          int
-	Type              string
-	TriggeredBy       string
-	JobID             string
-	JobModifyIndex    uint64
-	NodeID            string
-	NodeModifyIndex   uint64
-	Status            string
-	StatusDescription string
-	Wait              time.Duration
-	NextEval          string
-	PreviousEval      string
-	FailedTGAllocs    map[string]*AllocationMetric
-	CreateIndex       uint64
-	ModifyIndex       uint64
+	ID                 string
+	Priority           int
+	Type               string
+	TriggeredBy        string
+	JobID              string
+	JobModifyIndex     uint64
+	NodeID             string
+	NodeModifyIndex    uint64
+	Status             string
+	StatusDescription  string
+	Wait               time.Duration
+	NextEval           string
+	PreviousEval       string
+	SpawnedBlockedEval string
+	FailedTGAllocs     map[string]*AllocationMetric
+	CreateIndex        uint64
+	ModifyIndex        uint64
 }

 // EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@ -2617,6 +2617,11 @@ type Evaluation struct {
 	// This is used to support rolling upgrades, where we need a chain of evaluations.
 	PreviousEval string

+	// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
+	// blocked eval will be created if all allocations could not be placed due
+	// to constraints or lacking resources.
+	SpawnedBlockedEval string
+
 	// FailedTGAllocs are task groups which have allocations that could not be
 	// made, but the metrics are persisted so that the user can use the feedback
 	// to determine the cause.
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
 	}

 	// Retry up to the maxScheduleAttempts and reset if progress is made.
@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 			if err := s.createBlockedEval(); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
-			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
+			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
 			return mErr.ErrorOrNil()
@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	}

 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
 }

 // createBlockedEval creates a blocked eval and stores it.
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
 		t.Fatalf("expected no annotations")
 	}

+	// Ensure the eval has no spawned blocked eval
+	if len(h.Evals) != 1 {
+		t.Fatalf("bad: %#v", h.Evals)
+		if h.Evals[0].SpawnedBlockedEval != "" {
+			t.Fatalf("bad: %#v", h.Evals[0])
+		}
+	}
+
 	// Ensure the plan allocated
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 	}
 	outEval := h.Evals[0]

+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
 	// Ensure the plan failed to alloc
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
 		t.Fatalf("bad: %#v", out)
 	}

-	// Ensure the plan failed to alloc one tg
 	if len(h.Evals) != 1 {
 		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
 	}
 	outEval := h.Evals[0]

+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
+	// Ensure the plan failed to alloc one tg
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
 	}
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
 	}

 	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
 	progress := func() bool { return progressMade(s.planResult) }
 	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
-			return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
 		}
 		return err
 	}

 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
 }

 // process is wrapped in retryMax to iteratively run the handler until we have no
--- a/scheduler/util.go
+++ b/scheduler/util.go
@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
 }

 // setStatus is used to update the status of the evaluation
-func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
+func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
 	logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
 	newEval := eval.Copy()
 	newEval.Status = status
@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
 	if nextEval != nil {
 		newEval.NextEval = nextEval.ID
 	}
+	if spawnedBlocked != nil {
+		newEval.SpawnedBlockedEval = spawnedBlocked.ID
+	}
 	return planner.UpdateEval(newEval)
 }

--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
 	eval := mock.Eval()
 	status := "a"
 	desc := "b"
-	if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}

@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
 		t.Fatalf("setStatus() submited invalid eval: %v", newEval)
 	}

+	// Test next evals
 	h = NewHarness(t)
 	next := mock.Eval()
-	if err := setStatus(logger, h, eval, next, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}

@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
 	if newEval.NextEval != next.ID {
 		t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
 	}
+
+	// Test blocked evals
+	h = NewHarness(t)
+	blocked := mock.Eval()
+	if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
+		t.Fatalf("setStatus() failed: %v", err)
+	}
+
+	if len(h.Evals) != 1 {
+		t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
+	}
+
+	newEval = h.Evals[0]
+	if newEval.SpawnedBlockedEval != blocked.ID {
+		t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
+	}
 }

 func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {