From d44c4761f6e9f745d2dfd18f5da546aae8fd5080 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 15 Jun 2016 12:58:19 -0700
Subject: [PATCH] track failed allocations properly

---
 scheduler/system_sched.go | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go
index 42f509b39..ab135c63f 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -36,6 +36,8 @@ type SystemScheduler struct {
 
 	limitReached bool
 	nextEval     *structs.Evaluation
+
+	failedTGAllocs map[string]*structs.AllocMetric
 }
 
 // NewSystemScheduler is a factory function to instantiate a new system
@@ -60,20 +62,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, nil, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusFailed, desc)
 	}
 
 	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
 	progress := func() bool { return progressMade(s.planResult) }
 	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
-			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, nil, statusErr.EvalStatus, err.Error())
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, statusErr.EvalStatus, err.Error())
 		}
 		return err
 	}
 
 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, nil, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusComplete, "")
 }
 
 // process is wrapped in retryMax to iteratively run the handler until we have no
@@ -99,7 +101,7 @@ func (s *SystemScheduler) process() (bool, error) {
 	s.plan = s.eval.MakePlan(s.job)
 
 	// Reset the failed allocations
-	s.eval.FailedTGAllocs = nil
+	s.failedTGAllocs = nil
 
 	// Create an evaluation context
 	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
@@ -239,7 +241,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
 
 		if option == nil {
 			// Check if this task group has already failed
-			if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
+			if metric, ok := s.failedTGAllocs[missing.TaskGroup.Name]; ok {
 				metric.CoalescedFailures += 1
 				continue
 			}
@@ -267,11 +269,11 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
 			s.plan.AppendAlloc(alloc)
 		} else {
 			// Lazy initialize the failed map
-			if s.eval.FailedTGAllocs == nil {
-				s.eval.FailedTGAllocs = make(map[string]*structs.AllocMetric)
+			if s.failedTGAllocs == nil {
+				s.failedTGAllocs = make(map[string]*structs.AllocMetric)
 			}
 
-			s.eval.FailedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
+			s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
 		}
 	}