From 5e309f3f33d12c1646fa37db06363ddfdb709c0a Mon Sep 17 00:00:00 2001
From: Derek Strickland <1111455+DerekStrickland@users.noreply.github.com>
Date: Thu, 21 Apr 2022 10:05:58 -0400
Subject: [PATCH] reconciler: Handle canaries when client disconnects (#12539)

* plan_apply: Allow node updates in disconnected node plans
* plan: Keep the job when persisting unknown allocs
* reconciler: stop unknown allocs when stopping all
* reconcile_util: reorder filtering to handle canaries; skip rescheduling unknown
* heartbeat: Fix bug in node heartbeating
---
 nomad/heartbeat.go               |   5 +-
 nomad/node_endpoint.go           |   3 +-
 nomad/plan_apply.go              |   4 -
 nomad/structs/structs.go         |   2 -
 scheduler/reconcile.go           |   7 +-
 scheduler/reconcile_test.go      | 364 +++++++++++++++++++++++++++++++
 scheduler/reconcile_util.go      |  89 +++++---
 scheduler/reconcile_util_test.go | 351 ++++++++++++++++++-----------
 8 files changed, 645 insertions(+), 180 deletions(-)

diff --git a/nomad/heartbeat.go b/nomad/heartbeat.go
index e6dbb655f..e76b39541 100644
--- a/nomad/heartbeat.go
+++ b/nomad/heartbeat.go
@@ -167,7 +167,6 @@ func (h *nodeHeartbeater) invalidateHeartbeat(id string) {
 	if canDisconnect && hasPendingReconnects {
 		req.Status = structs.NodeStatusDisconnected
 	}
-
 	var resp structs.NodeUpdateResponse
 	if err := h.staticEndpoints.Node.UpdateStatus(&req, &resp); err != nil {
 		h.logger.Error("update node status failed", "error", err)
@@ -181,8 +180,8 @@ func (h *nodeHeartbeater) disconnectState(id string) (bool, bool) {
 		return false, false
 	}
 
-	// Exit if this is the node already in a state other than ready.
-	if node.Status != structs.NodeStatusReady {
+	// Exit if the node is already down or just initializing.
+	if node.Status == structs.NodeStatusDown || node.Status == structs.NodeStatusInit {
 		return false, false
 	}
 
diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go
index fc3aa81cc..f1dbb04df 100644
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@@ -573,7 +573,8 @@ func nodeStatusTransitionRequiresEval(newStatus, oldStatus string) bool {
 	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
 	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
 	disconnectedToOther := oldStatus == structs.NodeStatusDisconnected && newStatus != structs.NodeStatusDisconnected
-	return initToReady || terminalToReady || disconnectedToOther
+	otherToDisconnected := oldStatus != structs.NodeStatusDisconnected && newStatus == structs.NodeStatusDisconnected
+	return initToReady || terminalToReady || disconnectedToOther || otherToDisconnected
 }
 
 // UpdateDrain is used to update the drain mode of a client node
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 555f235cd..c86e854a4 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -705,10 +705,6 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
 // The plan is only valid for disconnected nodes if it only contains
 // updates to mark allocations as unknown.
 func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
-	if len(plan.NodeUpdate[nodeID]) != 0 || len(plan.NodePreemptions[nodeID]) != 0 {
-		return false
-	}
-
 	for _, alloc := range plan.NodeAllocation[nodeID] {
 		if alloc.ClientStatus != structs.AllocClientStatusUnknown {
 			return false
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index ae8a87b5c..3a49f1322 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -11305,8 +11305,6 @@ func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string)
 
 // AppendUnknownAlloc marks an allocation as unknown.
 func (p *Plan) AppendUnknownAlloc(alloc *Allocation) {
-	// Strip the job as it's set once on the ApplyPlanResultRequest.
-	alloc.Job = nil
 	// Strip the resources as they can be rebuilt.
 	alloc.Resources = nil
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index d363912bb..cfdc0996d 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -344,12 +344,13 @@ func (a *allocReconciler) handleStop(m allocMatrix) {
 // filterAndStopAll stops all allocations in an allocSet. This is useful in when
 // stopping an entire job or task group.
 func (a *allocReconciler) filterAndStopAll(set allocSet) uint64 {
-	untainted, migrate, lost, disconnecting, reconnecting, _ := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+	untainted, migrate, lost, disconnecting, reconnecting, ignore := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
 	a.markStop(untainted, "", allocNotNeeded)
 	a.markStop(migrate, "", allocNotNeeded)
 	a.markStop(lost, structs.AllocClientStatusLost, allocLost)
 	a.markStop(disconnecting, "", allocNotNeeded)
 	a.markStop(reconnecting, "", allocNotNeeded)
+	a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded)
 	return uint64(len(set))
 }
 
@@ -462,7 +463,6 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	if isCanarying {
 		untainted = untainted.difference(canaries)
 	}
-
 	requiresCanaries := a.requiresCanaries(tg, dstate, destructive, canaries)
 	if requiresCanaries {
 		a.computeCanaries(tg, dstate, destructive, canaries, desiredChanges, nameIndex)
@@ -619,6 +619,9 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
 
 		canaries = all.fromKeys(canaryIDs)
 		untainted, migrate, lost, _, _, _ := canaries.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
+		// We don't add these stops to desiredChanges because the deployment is
+		// still active. DesiredChanges is used to report deployment progress/final
+		// state. These transient failures aren't meaningful.
 		a.markStop(migrate, "", allocMigrating)
 		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
 
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 783970042..3cb29cb90 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -5549,6 +5549,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 					alloc.ClientStatus = tc.disconnectedAllocStatus
 					// Set the node id on all the disconnected allocs to the node under test.
 					alloc.NodeID = testNode.ID
+					alloc.NodeName = "disconnected"
 
 					alloc.AllocStates = []*structs.AllocState{{
 						Field: structs.AllocStateFieldClientStatus,
@@ -5631,3 +5632,366 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 		})
 	}
 }
+
+// Tests that a client disconnect while a canary is in progress generates the result.
+func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
+
+	type testCase struct {
+		name            string
+		nodes           []string
+		deploymentState *structs.DeploymentState
+		deployedAllocs  map[*structs.Node][]*structs.Allocation
+		canaryAllocs    map[*structs.Node][]*structs.Allocation
+		expectedResult  *resultExpectation
+	}
+
+	running := structs.AllocClientStatusRunning
+	complete := structs.AllocClientStatusComplete
+	unknown := structs.AllocClientStatusUnknown
+	pending := structs.AllocClientStatusPending
+	run := structs.AllocDesiredStatusRun
+	stop := structs.AllocDesiredStatusStop
+
+	maxClientDisconnect := 10 * time.Minute
+
+	readyNode := mock.Node()
+	readyNode.Name = "ready-" + readyNode.ID
+	readyNode.Status = structs.NodeStatusReady
+
+	disconnectedNode := mock.Node()
+	disconnectedNode.Name = "disconnected-" + disconnectedNode.ID
+	disconnectedNode.Status = structs.NodeStatusDisconnected
+
+	// Job with allocations and max_client_disconnect
+	job := mock.Job()
+
+	updatedJob := job.Copy()
+	updatedJob.Version = updatedJob.Version + 1
+
+	testCases := []testCase{
+		{
+			name: "3-placed-1-disconnect",
+			deploymentState: &structs.DeploymentState{
+				AutoRevert:        false,
+				AutoPromote:       false,
+				Promoted:          false,
+				ProgressDeadline:  5 * time.Minute,
+				RequireProgressBy: time.Now().Add(5 * time.Minute),
+				PlacedCanaries:    []string{},
+				DesiredCanaries:   1,
+				DesiredTotal:      6,
+				PlacedAllocs:      3,
+				HealthyAllocs:     2,
+				UnhealthyAllocs:   0,
+			},
+			deployedAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// filtered as terminal
+					{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
+					// Ignored
+					{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
+					// destructive, but discarded because canarying
+					{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// filtered as terminal
+					{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
+				},
+			},
+			canaryAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// Ignored
+					{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
+					// Ignored
+					{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[1]", ClientStatus: running, DesiredStatus: run},
+				},
+			},
+			expectedResult: &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             3,
+				destructive:       0,
+				stop:              0,
+				inplace:           0,
+				attributeUpdates:  0,
+				disconnectUpdates: 3,
+				reconnectUpdates:  0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					updatedJob.TaskGroups[0].Name: {
+						Place:  3,
+						Canary: 0,
+						Ignore: 3,
+					},
+				},
+			},
+		},
+		{
+			name: "ignore-unknown",
+			deploymentState: &structs.DeploymentState{
+				AutoRevert:        false,
+				AutoPromote:       false,
+				Promoted:          false,
+				ProgressDeadline:  5 * time.Minute,
+				RequireProgressBy: time.Now().Add(5 * time.Minute),
+				PlacedCanaries:    []string{},
+				DesiredCanaries:   1,
+				DesiredTotal:      6,
+				PlacedAllocs:      3,
+				HealthyAllocs:     2,
+				UnhealthyAllocs:   0,
+			},
+			deployedAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// filtered as terminal
+					{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
+					// Ignored
+					{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
+					// destructive, but discarded because canarying
+					{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// filtered as terminal
+					{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
+				},
+			},
+			canaryAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// Ignored
+					{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
+					// Ignored
+					{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// Ignored
+					{Name: "my-job.web[1]", ClientStatus: unknown, DesiredStatus: run},
+				},
+			},
+			expectedResult: &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             2,
+				destructive:       0,
+				stop:              0,
+				inplace:           0,
+				attributeUpdates:  0,
+				disconnectUpdates: 2,
+				reconnectUpdates:  0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					updatedJob.TaskGroups[0].Name: {
+						Place:  2,
+						Canary: 0,
+						Ignore: 4,
+					},
+				},
+			},
+		},
+		{
+			name: "4-placed-2-pending-lost",
+			deploymentState: &structs.DeploymentState{
+				AutoRevert:        false,
+				AutoPromote:       false,
+				Promoted:          false,
+				ProgressDeadline:  5 * time.Minute,
+				RequireProgressBy: time.Now().Add(5 * time.Minute),
+				PlacedCanaries:    []string{},
+				DesiredCanaries:   2,
+				DesiredTotal:      6,
+				PlacedAllocs:      4,
+				HealthyAllocs:     2,
+				UnhealthyAllocs:   0,
+			},
+			deployedAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// filtered as terminal
+					{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
+					// filtered as terminal
+					{Name: "my-job.web[2]", ClientStatus: complete, DesiredStatus: stop},
+					// destructive, but discarded because canarying
+					{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// filtered as terminal
+					{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
+					// Gets a placement, and a disconnect update
+					{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
+				},
+			},
+			canaryAllocs: map[*structs.Node][]*structs.Allocation{
+				readyNode: {
+					// Ignored
+					{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
+					// Ignored
+					{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: run},
+				},
+				disconnectedNode: {
+					// Stop/Lost because pending
+					{Name: "my-job.web[1]", ClientStatus: pending, DesiredStatus: run},
+					// Stop/Lost because pending
+					{Name: "my-job.web[3]", ClientStatus: pending, DesiredStatus: run},
+				},
+			},
+			expectedResult: &resultExpectation{
+				createDeployment:  nil,
+				deploymentUpdates: nil,
+				place:             2,
+				destructive:       0,
+				stop:              2,
+				inplace:           0,
+				attributeUpdates:  0,
+				disconnectUpdates: 2,
+				reconnectUpdates:  0,
+				desiredTGUpdates: map[string]*structs.DesiredUpdates{
+					updatedJob.TaskGroups[0].Name: {
+						Place:  2,
+						Canary: 0,
+						Ignore: 3,
+						// The 2 stops in this test are transient failures, but
+						// the deployment can still progress. We don't include
+						// them in the stop count since DesiredTGUpdates is used
+						// to report deployment progress or final deployment state.
+						Stop: 0,
+					},
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Set the count dynamically to the number from the original deployment.
+			job.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
+			job.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
+			job.TaskGroups[0].Update = &structs.UpdateStrategy{
+				MaxParallel:     1,
+				Canary:          tc.deploymentState.DesiredCanaries,
+				MinHealthyTime:  3 * time.Second,
+				HealthyDeadline: 20 * time.Second,
+				AutoRevert:      true,
+				AutoPromote:     true,
+			}
+
+			updatedJob.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
+			updatedJob.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
+			updatedJob.TaskGroups[0].Update = &structs.UpdateStrategy{
+				MaxParallel:     1,
+				Canary:          tc.deploymentState.DesiredCanaries,
+				MinHealthyTime:  3 * time.Second,
+				HealthyDeadline: 20 * time.Second,
+				AutoRevert:      true,
+				AutoPromote:     true,
+			}
+
+			// Populate Alloc IDS, Node IDs, Job on deployed allocs
+			allocsConfigured := 0
+			for node, allocs := range tc.deployedAllocs {
+				for _, alloc := range allocs {
+					alloc.ID = uuid.Generate()
+					alloc.NodeID = node.ID
+					alloc.NodeName = node.Name
+					alloc.JobID = job.ID
+					alloc.Job = job
+					alloc.TaskGroup = job.TaskGroups[0].Name
+					allocsConfigured++
+				}
+			}
+
+			require.Equal(t, tc.deploymentState.DesiredTotal, allocsConfigured, "invalid alloc configuration: expect %d got %d", tc.deploymentState.DesiredTotal, allocsConfigured)
+
+			// Populate Alloc IDS, Node IDs, Job on canaries
+			canariesConfigured := 0
+			handled := make(map[string]allocUpdateType)
+			for node, allocs := range tc.canaryAllocs {
+				for _, alloc := range allocs {
+					alloc.ID = uuid.Generate()
+					alloc.NodeID = node.ID
+					alloc.NodeName = node.Name
+					alloc.JobID = updatedJob.ID
+					alloc.Job = updatedJob
+					alloc.TaskGroup = updatedJob.TaskGroups[0].Name
+					alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
+						Canary: true,
+					}
+					if alloc.ClientStatus == structs.AllocClientStatusRunning {
+						alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
+					}
+					tc.deploymentState.PlacedCanaries = append(tc.deploymentState.PlacedCanaries, alloc.ID)
+					handled[alloc.ID] = allocUpdateFnIgnore
+					canariesConfigured++
+				}
+			}
+
+			// Validate tc.canaryAllocs against tc.deploymentState
+			require.Equal(t, tc.deploymentState.PlacedAllocs, canariesConfigured, "invalid canary configuration: expect %d got %d", tc.deploymentState.PlacedAllocs, canariesConfigured)
+
+			deployment := structs.NewDeployment(updatedJob, 50)
+			deployment.TaskGroups[updatedJob.TaskGroups[0].Name] = tc.deploymentState
+
+			// Build a map of tainted nodes that contains the last canary
+			tainted := make(map[string]*structs.Node, 1)
+			tainted[disconnectedNode.ID] = disconnectedNode
+
+			allocs := make([]*structs.Allocation, 0)
+
+			allocs = append(allocs, tc.deployedAllocs[readyNode]...)
+			allocs = append(allocs, tc.deployedAllocs[disconnectedNode]...)
+			allocs = append(allocs, tc.canaryAllocs[readyNode]...)
+			allocs = append(allocs, tc.canaryAllocs[disconnectedNode]...)
+
+			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
+			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob,
+				deployment, allocs, tainted, "", 50, true)
+			result := reconciler.Compute()
+
+			// Assert the correct results
+			assertResults(t, result, tc.expectedResult)
+
+			// Validate that placements are either for placed canaries for the
+			// updated job, or for disconnected allocs for the original job
+			// and that they have a disconnect update.
+			for _, placeResult := range result.place {
+				found := false
+				require.NotNil(t, placeResult.previousAlloc)
+				for _, deployed := range tc.deployedAllocs[disconnectedNode] {
+					if deployed.ID == placeResult.previousAlloc.ID {
+						found = true
+						require.Equal(t, job.Version, placeResult.previousAlloc.Job.Version)
+						require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
+						_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
+						require.True(t, exists)
+						break
+					}
+				}
+				for _, canary := range tc.canaryAllocs[disconnectedNode] {
+					if canary.ID == placeResult.previousAlloc.ID {
+						found = true
+						require.Equal(t, updatedJob.Version, placeResult.previousAlloc.Job.Version)
+						require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
+						_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
+						require.True(t, exists)
+						break
+					}
+				}
+				require.True(t, found)
+			}
+
+			// Validate that stops are for pending disconnects
+			for _, stopResult := range result.stop {
+				require.Equal(t, pending, stopResult.alloc.ClientStatus)
+			}
+		})
+	}
+}
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 395998ff2..4d0cfb7a8 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -225,7 +225,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 	ignore = make(map[string]*structs.Allocation)
 
 	for _, alloc := range a {
-
 		// make sure we don't apply any reconnect logic to task groups
 		// without max_client_disconnect
 		supportsDisconnectedClients := alloc.SupportsDisconnectedClients(serverSupportsDisconnectedClients)
@@ -251,6 +250,40 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
+		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
+		if taintedNode != nil {
+			// Group disconnecting/reconnecting
+			switch taintedNode.Status {
+			case structs.NodeStatusDisconnected:
+				if supportsDisconnectedClients {
+					// Filter running allocs on a node that is disconnected to be marked as unknown.
+					if alloc.ClientStatus == structs.AllocClientStatusRunning {
+						disconnecting[alloc.ID] = alloc
+						continue
+					}
+					// Filter pending allocs on a node that is disconnected to be marked as lost.
+					if alloc.ClientStatus == structs.AllocClientStatusPending {
+						lost[alloc.ID] = alloc
+						continue
+					}
+				} else {
+					lost[alloc.ID] = alloc
+					continue
+				}
+			case structs.NodeStatusReady:
+				// Filter reconnecting allocs on a node that is now connected.
+				if reconnected {
+					if expired {
+						lost[alloc.ID] = alloc
+						continue
+					}
+					reconnecting[alloc.ID] = alloc
+					continue
+				}
+			default:
+			}
+		}
+
 		// Terminal allocs, if not reconnected, are always untainted as they
 		// should never be migrated.
 		if alloc.TerminalStatus() && !reconnected {
@@ -287,8 +320,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		taintedNode, ok := taintedNodes[alloc.NodeID]
-		if !ok {
+		if !nodeIsTainted {
 			// Filter allocs on a node that is now re-connected to be resumed.
 			if reconnected {
 				if expired {
@@ -304,39 +336,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}
 
-		if taintedNode != nil {
-			// Group disconnecting/reconnecting
-			switch taintedNode.Status {
-			case structs.NodeStatusDisconnected:
-				if supportsDisconnectedClients {
-					// Filter running allocs on a node that is disconnected to be marked as unknown.
-					if alloc.ClientStatus == structs.AllocClientStatusRunning {
-						disconnecting[alloc.ID] = alloc
-						continue
-					}
-					// Filter pending allocs on a node that is disconnected to be marked as lost.
-					if alloc.ClientStatus == structs.AllocClientStatusPending {
-						lost[alloc.ID] = alloc
-						continue
-					}
-				} else {
-					lost[alloc.ID] = alloc
-					continue
-				}
-			case structs.NodeStatusReady:
-				// Filter reconnecting allocs with replacements on a node that is now connected.
-				if reconnected {
-					if expired {
-						lost[alloc.ID] = alloc
-						continue
-					}
-					reconnecting[alloc.ID] = alloc
-					continue
-				}
-			default:
-			}
-		}
-
 		// Allocs on GC'd (nil) or lost nodes are Lost
 		if taintedNode == nil || taintedNode.TerminalStatus() {
 			lost[alloc.ID] = alloc
@@ -361,6 +360,12 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
 	// When filtering disconnected sets, the untainted set is never populated.
 	// It has no purpose in that context.
 	for _, alloc := range a {
+		// Ignore disconnecting allocs that are already unknown. This can happen
+		// in the case of canaries that are interrupted by a disconnect.
+		if isDisconnecting && alloc.ClientStatus == structs.AllocClientStatusUnknown {
+			continue
+		}
+
 		var eligibleNow, eligibleLater bool
 		var rescheduleTime time.Time
 
@@ -557,6 +562,18 @@ func (a allocSet) delayByMaxClientDisconnect(now time.Time) (later []*delayedRes
 	return
 }
 
+// filterByClientStatus returns allocs from the set with the specified client status.
+func (a allocSet) filterByClientStatus(clientStatus string) allocSet {
+	allocs := make(allocSet)
+	for _, alloc := range a {
+		if alloc.ClientStatus == clientStatus {
+			allocs[alloc.ID] = alloc
+		}
+	}
+
+	return allocs
+}
+
 // allocNameIndex is used to select allocation names for placement or removal
 // given an existing set of placed allocations.
 type allocNameIndex struct {
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index 0b2dc0331..12d1c6b09 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -266,23 +266,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			all: allocSet{
 				// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
 				"lost-running": {
-					ID:           "lost-running",
-					Name:         "lost-running",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJobNoMaxDisconnect,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
-				},
-				// Unknown allocs on disconnected nodes w/o max-disconnect are lost
-				"lost-unknown": {
-					ID:            "lost-unknown",
-					Name:          "lost-unknown",
-					ClientStatus:  structs.AllocClientStatusUnknown,
+					ID:            "lost-running",
+					Name:          "lost-running",
+					ClientStatus:  structs.AllocClientStatusRunning,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJobNoMaxDisconnect,
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
-					AllocStates:   unknownAllocState,
 				},
 			},
 			untainted:     allocSet{},
@@ -292,22 +282,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			ignore:        allocSet{},
 			lost: allocSet{
 				"lost-running": {
-					ID:           "lost-running",
-					Name:         "lost-running",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJobNoMaxDisconnect,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
-				},
-				"lost-unknown": {
-					ID:            "lost-unknown",
-					Name:          "lost-unknown",
-					ClientStatus:  structs.AllocClientStatusUnknown,
+					ID:            "lost-running",
+					Name:          "lost-running",
+					ClientStatus:  structs.AllocClientStatusRunning,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJobNoMaxDisconnect,
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
-					AllocStates:   unknownAllocState,
 				},
 			},
 		},
@@ -324,6 +305,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "running-replacement",
 					Name:               "web",
 					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -349,6 +331,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "running-replacement",
 					Name:               "web",
 					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -384,14 +367,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				// Node.UpdateStatus has already handled syncing client state so this
 				// should be a noop.
 				"reconnecting-running-no-replacement": {
-					ID:           "reconnecting-running-no-replacement",
-					Name:         "web",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "reconnecting-running-no-replacement",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 			},
 			untainted:     allocSet{},
@@ -399,14 +383,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			disconnecting: allocSet{},
 			reconnecting: allocSet{
 				"reconnecting-running-no-replacement": {
-					ID:           "reconnecting-running-no-replacement",
-					Name:         "web",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "reconnecting-running-no-replacement",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 			},
 			ignore: allocSet{},
@@ -421,43 +406,47 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			all: allocSet{
 				// Allocs on reconnected nodes that are complete are untainted
 				"untainted-reconnect-complete": {
-					ID:           "untainted-reconnect-complete",
-					Name:         "untainted-reconnect-complete",
-					ClientStatus: structs.AllocClientStatusComplete,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "untainted-reconnect-complete",
+					Name:          "untainted-reconnect-complete",
+					ClientStatus:  structs.AllocClientStatusComplete,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 				// Failed allocs on reconnected nodes are in reconnecting so that
 				// they be marked with desired status stop at the server.
 				"reconnecting-failed": {
-					ID:           "reconnecting-failed",
-					Name:         "reconnecting-failed",
-					ClientStatus: structs.AllocClientStatusFailed,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "reconnecting-failed",
+					Name:          "reconnecting-failed",
+					ClientStatus:  structs.AllocClientStatusFailed,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 				// Lost allocs on reconnected nodes don't get restarted
 				"untainted-reconnect-lost": {
-					ID:           "untainted-reconnect-lost",
-					Name:         "untainted-reconnect-lost",
-					ClientStatus: structs.AllocClientStatusLost,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "untainted-reconnect-lost",
+					Name:          "untainted-reconnect-lost",
+					ClientStatus:  structs.AllocClientStatusLost,
+					DesiredStatus: structs.AllocDesiredStatusStop,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 				// Replacement allocs that are complete are untainted
 				"untainted-reconnect-complete-replacement": {
 					ID:                 "untainted-reconnect-complete-replacement",
 					Name:               "untainted-reconnect-complete",
 					ClientStatus:       structs.AllocClientStatusComplete,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -469,6 +458,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "untainted-reconnect-failed-replacement",
 					Name:               "untainted-reconnect-failed",
 					ClientStatus:       structs.AllocClientStatusFailed,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -480,6 +470,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "untainted-reconnect-lost-replacement",
 					Name:               "untainted-reconnect-lost",
 					ClientStatus:       structs.AllocClientStatusLost,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -489,29 +480,32 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			},
 			untainted: allocSet{
 				"untainted-reconnect-complete": {
-					ID:           "untainted-reconnect-complete",
-					Name:         "untainted-reconnect-complete",
-					ClientStatus: structs.AllocClientStatusComplete,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "untainted-reconnect-complete",
+					Name:          "untainted-reconnect-complete",
+					ClientStatus:  structs.AllocClientStatusComplete,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 				"untainted-reconnect-lost": {
-					ID:           "untainted-reconnect-lost",
-					Name:         "untainted-reconnect-lost",
-					ClientStatus: structs.AllocClientStatusLost,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "untainted-reconnect-lost",
+					Name:          "untainted-reconnect-lost",
+					ClientStatus:  structs.AllocClientStatusLost,
+					DesiredStatus: structs.AllocDesiredStatusStop,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 				"untainted-reconnect-complete-replacement": {
 					ID:                 "untainted-reconnect-complete-replacement",
 					Name:               "untainted-reconnect-complete",
 					ClientStatus:       structs.AllocClientStatusComplete,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -522,6 +516,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "untainted-reconnect-failed-replacement",
 					Name:               "untainted-reconnect-failed",
 					ClientStatus:       structs.AllocClientStatusFailed,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -532,6 +527,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "untainted-reconnect-lost-replacement",
 					Name:               "untainted-reconnect-lost",
 					ClientStatus:       structs.AllocClientStatusLost,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -543,14 +539,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			disconnecting: allocSet{},
 			reconnecting: allocSet{
 				"reconnecting-failed": {
-					ID:           "reconnecting-failed",
-					Name:         "reconnecting-failed",
-					ClientStatus: structs.AllocClientStatusFailed,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "reconnecting-failed",
+					Name:          "reconnecting-failed",
+					ClientStatus:  structs.AllocClientStatusFailed,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 			},
 			ignore: allocSet{},
@@ -565,12 +562,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			all: allocSet{
 				// Non-terminal allocs on disconnected nodes are disconnecting
 				"disconnect-running": {
-					ID:           "disconnect-running",
-					Name:         "disconnect-running",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
+					ID:            "disconnect-running",
+					Name:          "disconnect-running",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
 				},
 				// Unknown allocs on disconnected nodes are ignored
 				"ignore-unknown": {
@@ -585,13 +583,37 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 				// Unknown allocs on disconnected nodes are lost when expired
 				"lost-unknown": {
-					ID:           "lost-unknown",
-					Name:         "lost-unknown",
-					ClientStatus: structs.AllocClientStatusUnknown,
-					Job:          testJob,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
-					AllocStates:  expiredAllocState,
+					ID:            "lost-unknown",
+					Name:          "lost-unknown",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+					AllocStates:   expiredAllocState,
+				},
+				// Pending allocs on disconnected nodes are lost
+				"lost-pending": {
+					ID:            "lost-pending",
+					Name:          "lost-pending",
+					ClientStatus:  structs.AllocClientStatusPending,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+				// Expired allocs on reconnected clients are lost
+				// Pending allocs on disconnected nodes are lost
+				"lost-expired": {
+					ID:            "lost-expired",
+					Name:          "lost-expired",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					TaskStates:    reconnectTaskState,
+					AllocStates:   expiredAllocState,
 				},
 				// Failed and stopped allocs on disconnected nodes are ignored
 				"ignore-reconnected-failed-stopped": {
@@ -603,19 +625,20 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
 					TaskStates:    reconnectTaskState,
-					AllocStates:   unknownAllocState, // TODO really?
+					AllocStates:   unknownAllocState,
 				},
 			},
 			untainted: allocSet{},
 			migrate:   allocSet{},
 			disconnecting: allocSet{
 				"disconnect-running": {
-					ID:           "disconnect-running",
-					Name:         "disconnect-running",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
+					ID:            "disconnect-running",
+					Name:          "disconnect-running",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
 				},
 			},
 			reconnecting: allocSet{},
@@ -645,13 +668,73 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			},
 			lost: allocSet{
 				"lost-unknown": {
-					ID:           "lost-unknown",
-					Name:         "lost-unknown",
-					ClientStatus: structs.AllocClientStatusUnknown,
-					Job:          testJob,
-					NodeID:       "disconnected",
-					TaskGroup:    "web",
-					AllocStates:  expiredAllocState,
+					ID:            "lost-unknown",
+					Name:          "lost-unknown",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+					AllocStates:   expiredAllocState,
+				},
+				"lost-pending": {
+					ID:            "lost-pending",
+					Name:          "lost-pending",
+					ClientStatus:  structs.AllocClientStatusPending,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+				},
+				"lost-expired": {
+					ID:            "lost-expired",
+					Name:          "lost-expired",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					TaskStates:    reconnectTaskState,
+					AllocStates:   expiredAllocState,
+				},
+			},
+		},
+		{
+			name:                        "disco-client-reconnect",
+			supportsDisconnectedClients: true,
+			now:                         time.Now(),
+			taintedNodes:                nodes,
+			skipNilNodeTest:             false,
+			all: allocSet{
+				// Expired allocs on reconnected clients are lost
+				"lost-expired-reconnect": {
+					ID:            "lost-expired-reconnect",
+					Name:          "lost-expired-reconnect",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					TaskStates:    reconnectTaskState,
+					AllocStates:   expiredAllocState,
+				},
+			},
+			untainted:     allocSet{},
+			migrate:       allocSet{},
+			disconnecting: allocSet{},
+			reconnecting:  allocSet{},
+			ignore:        allocSet{},
+			lost: allocSet{
+				"lost-expired-reconnect": {
+					ID:            "lost-expired-reconnect",
+					Name:          "lost-expired-reconnect",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					TaskStates:    reconnectTaskState,
+					AllocStates:   expiredAllocState,
 				},
 			},
 		},
@@ -666,6 +749,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "running-replacement",
 					Name:               "web",
 					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -673,14 +757,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 				// Running and replaced allocs on reconnected nodes are reconnecting
 				"running-original": {
-					ID:           "running-original",
-					Name:         "web",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "running-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 			},
 			untainted: allocSet{
@@ -688,6 +773,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					ID:                 "running-replacement",
 					Name:               "web",
 					ClientStatus:       structs.AllocClientStatusRunning,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
 					NodeID:             "normal",
 					TaskGroup:          "web",
@@ -698,14 +784,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			disconnecting: allocSet{},
 			reconnecting: allocSet{
 				"running-original": {
-					ID:           "running-original",
-					Name:         "web",
-					ClientStatus: structs.AllocClientStatusRunning,
-					Job:          testJob,
-					NodeID:       "normal",
-					TaskGroup:    "web",
-					AllocStates:  unknownAllocState,
-					TaskStates:   reconnectTaskState,
+					ID:            "running-original",
+					Name:          "web",
+					ClientStatus:  structs.AllocClientStatusRunning,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+					TaskStates:    reconnectTaskState,
 				},
 			},
 			ignore: allocSet{},