From 1feb57b0474cc4eb72579e1b47a7bf1e7e41345d Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 19 May 2016 13:09:52 -0700
Subject: [PATCH] Evals track blocked evals they create

---
 api/evaluations.go              | 33 +++++++++++++++++----------------
 nomad/structs/structs.go        |  5 +++++
 scheduler/generic_sched.go      |  6 +++---
 scheduler/generic_sched_test.go | 20 +++++++++++++++++++-
 scheduler/system_sched.go       |  6 +++---
 scheduler/util.go               |  5 ++++-
 scheduler/util_test.go          | 21 +++++++++++++++++++--
 7 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/api/evaluations.go b/api/evaluations.go
index 6d05a4da0..a10c3cba5 100644
--- a/api/evaluations.go
+++ b/api/evaluations.go
@@ -54,22 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation
 
 // Evaluation is used to serialize an evaluation.
 type Evaluation struct {
-	ID                string
-	Priority          int
-	Type              string
-	TriggeredBy       string
-	JobID             string
-	JobModifyIndex    uint64
-	NodeID            string
-	NodeModifyIndex   uint64
-	Status            string
-	StatusDescription string
-	Wait              time.Duration
-	NextEval          string
-	PreviousEval      string
-	FailedTGAllocs    map[string]*AllocationMetric
-	CreateIndex       uint64
-	ModifyIndex       uint64
+	ID                 string
+	Priority           int
+	Type               string
+	TriggeredBy        string
+	JobID              string
+	JobModifyIndex     uint64
+	NodeID             string
+	NodeModifyIndex    uint64
+	Status             string
+	StatusDescription  string
+	Wait               time.Duration
+	NextEval           string
+	PreviousEval       string
+	SpawnedBlockedEval string
+	FailedTGAllocs     map[string]*AllocationMetric
+	CreateIndex        uint64
+	ModifyIndex        uint64
 }
 
 // EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 45e9e64e5..577893850 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -2617,6 +2617,11 @@ type Evaluation struct {
 	// This is used to support rolling upgrades, where we need a chain of evaluations.
 	PreviousEval string
 
+	// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
+	// blocked eval will be created if all allocations could not be placed due
+	// to constraints or lacking resources.
+	SpawnedBlockedEval string
+
 	// FailedTGAllocs are task groups which have allocations that could not be
 	// made, but the metrics are persisted so that the user can use the feedback
 	// to determine the cause.
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 0e7d838e4..2595fbac7 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
 	}
 
 	// Retry up to the maxScheduleAttempts and reset if progress is made.
@@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 			if err := s.createBlockedEval(); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
-			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
+			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
 			return mErr.ErrorOrNil()
@@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	}
 
 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
 }
 
 // createBlockedEval creates a blocked eval and stores it.
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index 09b980155..879da2a83 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
 		t.Fatalf("expected no annotations")
 	}
 
+	// Ensure the eval has no spawned blocked eval
+	if len(h.Evals) != 1 {
+		t.Fatalf("bad: %#v", h.Evals)
+		if h.Evals[0].SpawnedBlockedEval != "" {
+			t.Fatalf("bad: %#v", h.Evals[0])
+		}
+	}
+
 	// Ensure the plan allocated
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
@@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 	}
 	outEval := h.Evals[0]
 
+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
 	// Ensure the plan failed to alloc
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
@@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
 		t.Fatalf("bad: %#v", out)
 	}
 
-	// Ensure the plan failed to alloc one tg
 	if len(h.Evals) != 1 {
 		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
 	}
 	outEval := h.Evals[0]
 
+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
+	// Ensure the plan failed to alloc one tg
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
 	}
diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go
index e5a559e23..e0bef083a 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
 	}
 
 	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
 	progress := func() bool { return progressMade(s.planResult) }
 	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
-			return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
 		}
 		return err
 	}
 
 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
 }
 
 // process is wrapped in retryMax to iteratively run the handler until we have no
diff --git a/scheduler/util.go b/scheduler/util.go
index 4dc411dc9..89ad49dc9 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
 }
 
 // setStatus is used to update the status of the evaluation
-func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
+func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
 	logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
 	newEval := eval.Copy()
 	newEval.Status = status
@@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
 	if nextEval != nil {
 		newEval.NextEval = nextEval.ID
 	}
+	if spawnedBlocked != nil {
+		newEval.SpawnedBlockedEval = spawnedBlocked.ID
+	}
 	return planner.UpdateEval(newEval)
 }
 
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 1d689fe2b..73fc29f8f 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
 	eval := mock.Eval()
 	status := "a"
 	desc := "b"
-	if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}
 
@@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
 		t.Fatalf("setStatus() submited invalid eval: %v", newEval)
 	}
 
+	// Test next evals
 	h = NewHarness(t)
 	next := mock.Eval()
-	if err := setStatus(logger, h, eval, next, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}
 
@@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
 	if newEval.NextEval != next.ID {
 		t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
 	}
+
+	// Test blocked evals
+	h = NewHarness(t)
+	blocked := mock.Eval()
+	if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
+		t.Fatalf("setStatus() failed: %v", err)
+	}
+
+	if len(h.Evals) != 1 {
+		t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
+	}
+
+	newEval = h.Evals[0]
+	if newEval.SpawnedBlockedEval != blocked.ID {
+		t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
+	}
 }
 
 func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {