Merge pull request #839 from hashicorp/f-refreshindex

nomad: cleanup handling of refresh index
2016-02-22 11:49:21 -08:00 · 2016-02-22 11:49:21 -08:00 · e6ba72556a
parent 64ef904e2c fd5d7906f4
commit e6ba72556a
2 changed files with 32 additions and 13 deletions
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@ -187,6 +187,14 @@ func (s *Server) asyncPlanWait(waitCh chan struct{}, future raft.ApplyFuture,
 	// Respond to the plan
 	result.AllocIndex = future.Index()
 	// If this is a partial plan application, we need to ensure the scheduler
 	// at least has visibility into any placements it made to avoid double placement.
 	// The RefreshIndex computed by evaluatePlan may be stale due to evaluation
 	// against an optimistic copy of the state.
 	if result.RefreshIndex != 0 {
 		result.RefreshIndex = maxUint64(result.RefreshIndex, result.AllocIndex)
 	}
 	pending.respond(result, nil)
 }
@ -215,6 +223,7 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P
 	// Setup a multierror to handle potentially getting many
 	// errors since we are processing in parallel.
 	var mErr multierror.Error
 	partialCommit := false
 	// handleResult is used to process the result of evaluateNodePlan
 	handleResult := func(nodeID string, fit bool, err error) (cancel bool) {
@ -224,19 +233,8 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P
 			return true
 		}
 		if !fit {
-			// Scheduler must have stale data, RefreshIndex should force
+			// Set that this is a partial commit
-			// the latest view of allocations and nodes
+			partialCommit = true
 			allocIndex, err := snap.Index("allocs")
 			if err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 				return true
 			}
 			nodeIndex, err := snap.Index("nodes")
 			if err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 				return true
 			}
 			result.RefreshIndex = maxUint64(nodeIndex, allocIndex)
 			// If we require all-at-once scheduling, there is no point
 			// to continue the evaluation, as we've already failed.
@ -294,6 +292,21 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P
 		}
 		outstanding--
 	}
 	// If the plan resulted in a partial commit, we need to determine
 	// a minimum refresh index to force the scheduler to work on a more
 	// up-to-date state to avoid the failures.
 	if partialCommit {
 		allocIndex, err := snap.Index("allocs")
 		if err != nil {
 			mErr.Errors = append(mErr.Errors, err)
 		}
 		nodeIndex, err := snap.Index("nodes")
 		if err != nil {
 			mErr.Errors = append(mErr.Errors, err)
 		}
 		result.RefreshIndex = maxUint64(nodeIndex, allocIndex)
 	}
 	return result, mErr.ErrorOrNil()
 }
--- a/nomad/plan_apply_test.go
+++ b/nomad/plan_apply_test.go
@ -236,6 +236,9 @@ func TestPlanApply_EvalPlan_Partial(t *testing.T) {
 	if _, ok := result.NodeAllocation[node2.ID]; ok {
 		t.Fatalf("should not allow alloc2")
 	}
 	if result.RefreshIndex != 1001 {
 		t.Fatalf("bad: %d", result.RefreshIndex)
 	}
 }
 func TestPlanApply_EvalPlan_Partial_AllAtOnce(t *testing.T) {
@ -271,6 +274,9 @@ func TestPlanApply_EvalPlan_Partial_AllAtOnce(t *testing.T) {
 	if len(result.NodeAllocation) != 0 {
 		t.Fatalf("should not alloc: %v", result.NodeAllocation)
 	}
 	if result.RefreshIndex != 1001 {
 		t.Fatalf("bad: %d", result.RefreshIndex)
 	}
 }
 func TestPlanApply_EvalNodePlan_Simple(t *testing.T) {