backport of commit e8efe2d251bf3628f13c7eb3ce2422eb7e5b85f6 (#18884)

Co-authored-by: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
2023-10-27 10:20:53 -05:00 · 2023-10-27 10:20:53 -05:00 · 50c9af53b7
parent c21331bc21
commit 50c9af53b7
7 changed files with 560 additions and 260 deletions
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@ -134,6 +134,7 @@ const (
 )

 const (
+
 	// SystemInitializationType is used for messages that initialize parts of
 	// the system, such as the state store. These messages are not included in
 	// the event stream.
@ -10730,6 +10731,12 @@ func (a *Allocation) MigrateStrategy() *MigrateStrategy {
 func (a *Allocation) NextRescheduleTime() (time.Time, bool) {
 	failTime := a.LastEventTime()
 	reschedulePolicy := a.ReschedulePolicy()
+
+	//If reschedule is disabled, return early
+	if reschedulePolicy.Attempts == 0 && !reschedulePolicy.Unlimited {
+		return time.Time{}, false
+	}
+
 	if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil {
 		return time.Time{}, false
 	}
@ -10749,16 +10756,16 @@ func (a *Allocation) nextRescheduleTime(failTime time.Time, reschedulePolicy *Re
 	return nextRescheduleTime, rescheduleEligible
 }

-// NextRescheduleTimeByFailTime works like NextRescheduleTime but allows callers
+// NextRescheduleTimeByTime works like NextRescheduleTime but allows callers
 // specify a failure time. Useful for things like determining whether to reschedule
 // an alloc on a disconnected node.
-func (a *Allocation) NextRescheduleTimeByFailTime(failTime time.Time) (time.Time, bool) {
+func (a *Allocation) NextRescheduleTimeByTime(t time.Time) (time.Time, bool) {
 	reschedulePolicy := a.ReschedulePolicy()
 	if reschedulePolicy == nil {
 		return time.Time{}, false
 	}

-	return a.nextRescheduleTime(failTime, reschedulePolicy)
+	return a.nextRescheduleTime(t, reschedulePolicy)
 }

 // ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration
@ -11098,7 +11105,7 @@ func (a *Allocation) Expired(now time.Time) bool {
 	}

 	expiry := lastUnknown.Add(*tg.MaxClientDisconnect)
-	return now.UTC().After(expiry) || now.UTC().Equal(expiry)
+	return expiry.Sub(now) <= 0
 }

 // LastUnknown returns the timestamp for the last time the allocation
--- a/nomad/worker.go
+++ b/nomad/worker.go
@ -786,7 +786,7 @@ SUBMIT:
 		}
 		return err
 	} else {
-		w.logger.Debug("created evaluation", "eval", log.Fmt("%#v", eval))
+		w.logger.Debug("created evaluation", "eval", log.Fmt("%#v", eval), "waitUntil", log.Fmt("%#v", eval.WaitUntil.String()))
 		w.backoffReset()
 	}
 	return nil
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@ -6944,6 +6944,7 @@ func TestServiceSched_Client_Disconnect_Creates_Updates_and_Evals(t *testing.T)
 		NodeID:      disconnectedNode.ID,
 		Status:      structs.EvalStatusPending,
 	}}
+
 	nodeStatusUpdateEval := evals[0]
 	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), evals))

@ -6953,16 +6954,21 @@ func TestServiceSched_Client_Disconnect_Creates_Updates_and_Evals(t *testing.T)
 	require.Equal(t, structs.EvalStatusComplete, h.Evals[0].Status)
 	require.Len(t, h.Plans, 1, "plan")

-	// One followup delayed eval created
-	require.Len(t, h.CreateEvals, 1)
-	followUpEval := h.CreateEvals[0]
-	require.Equal(t, nodeStatusUpdateEval.ID, followUpEval.PreviousEval)
-	require.Equal(t, "pending", followUpEval.Status)
-	require.NotEmpty(t, followUpEval.WaitUntil)
+	// Two followup delayed eval created
+	require.Len(t, h.CreateEvals, 2)
+	followUpEval1 := h.CreateEvals[0]
+	require.Equal(t, nodeStatusUpdateEval.ID, followUpEval1.PreviousEval)
+	require.Equal(t, "pending", followUpEval1.Status)
+	require.NotEmpty(t, followUpEval1.WaitUntil)

-	// Insert eval in the state store
+	followUpEval2 := h.CreateEvals[1]
+	require.Equal(t, nodeStatusUpdateEval.ID, followUpEval2.PreviousEval)
+	require.Equal(t, "pending", followUpEval2.Status)
+	require.NotEmpty(t, followUpEval2.WaitUntil)
+
+	// Insert eval1 in the state store
 	testutil.WaitForResult(func() (bool, error) {
-		found, err := h.State.EvalByID(nil, followUpEval.ID)
+		found, err := h.State.EvalByID(nil, followUpEval1.ID)
 		if err != nil {
 			return false, err
 		}
@ -6976,12 +6982,34 @@ func TestServiceSched_Client_Disconnect_Creates_Updates_and_Evals(t *testing.T)

 		return true, nil
 	}, func(err error) {
+
+		require.NoError(t, err)
+	})
+
+	// Insert eval2 in the state store
+	testutil.WaitForResult(func() (bool, error) {
+		found, err := h.State.EvalByID(nil, followUpEval2.ID)
+		if err != nil {
+			return false, err
+		}
+		if found == nil {
+			return false, nil
+		}
+
+		require.Equal(t, nodeStatusUpdateEval.ID, found.PreviousEval)
+		require.Equal(t, "pending", found.Status)
+		require.NotEmpty(t, found.WaitUntil)
+
+		return true, nil
+	}, func(err error) {
+
 		require.NoError(t, err)
 	})

 	// Validate that the ClientStatus updates are part of the plan.
 	require.Len(t, h.Plans[0].NodeAllocation[disconnectedNode.ID], count)
 	// Pending update should have unknown status.
+
 	for _, nodeAlloc := range h.Plans[0].NodeAllocation[disconnectedNode.ID] {
 		require.Equal(t, nodeAlloc.ClientStatus, structs.AllocClientStatusUnknown)
 	}
@ -6991,6 +7019,7 @@ func TestServiceSched_Client_Disconnect_Creates_Updates_and_Evals(t *testing.T)
 	require.NoError(t, err, "plan.NodeUpdate")

 	// Validate that the StateStore Upsert applied the ClientStatus we specified.
+
 	for _, alloc := range unknownAllocs {
 		alloc, err = h.State.AllocByID(nil, alloc.ID)
 		require.NoError(t, err)
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@ -230,6 +230,7 @@ func (a *allocReconciler) computeDeploymentComplete(m allocMatrix) bool {
 		groupComplete := a.computeGroup(group, as)
 		complete = complete && groupComplete
 	}
+
 	return complete
 }

@ -398,6 +399,7 @@ func (a *allocReconciler) markDelayed(allocs allocSet, clientStatus, statusDescr
 // computeGroup reconciles state for a particular task group. It returns whether
 // the deployment it is for is complete with regards to the task group.
 func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
+
 	// Create the desired update object for the group
 	desiredChanges := new(structs.DesiredUpdates)
 	a.result.desiredTGUpdates[groupName] = desiredChanges
@ -426,6 +428,9 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	untainted, migrate, lost, disconnecting, reconnecting, ignore := all.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
 	desiredChanges.Ignore += uint64(len(ignore))

+	// Determine what set of terminal allocations need to be rescheduled
+	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, false, a.now, a.evalID, a.deployment)
+
 	// If there are allocations reconnecting we need to reconcile them and
 	// their replacements first because there is specific logic when deciding
 	// which ones to keep that can only be applied when the client reconnects.
@ -454,20 +459,28 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 		untainted = untainted.union(reconnect)
 	}

-	// Determine what set of terminal allocations need to be rescheduled
-	untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, false, a.now, a.evalID, a.deployment)
+	// Determine what set of disconnecting allocations need to be rescheduled now,
+	// which ones later and which ones can't be rescheduled at all.
+	timeoutLaterEvals := map[string]string{}
+	if len(disconnecting) > 0 {
+		untaintedDisconnecting, rescheduleDisconnecting, laterDisconnecting := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)

-	// Determine what set of disconnecting allocations need to be rescheduled
-	_, rescheduleDisconnecting, _ := disconnecting.filterByRescheduleable(a.batch, true, a.now, a.evalID, a.deployment)
-	rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
+		rescheduleNow = rescheduleNow.union(rescheduleDisconnecting)
+		untainted = untainted.union(untaintedDisconnecting)
+		rescheduleLater = append(rescheduleLater, laterDisconnecting...)
+
+		// Find delays for any disconnecting allocs that have max_client_disconnect,
+		// create followup evals, and update the ClientStatus to unknown.
+		timeoutLaterEvals = a.createTimeoutLaterEvals(disconnecting, tg.Name)
+	}

 	// Find delays for any lost allocs that have stop_after_client_disconnect
-	lostLater := lost.delayByStopAfterClientDisconnect()
-	lostLaterEvals := a.createLostLaterEvals(lostLater, tg.Name)
-
-	// Find delays for any disconnecting allocs that have max_client_disconnect,
-	// create followup evals, and update the ClientStatus to unknown.
-	timeoutLaterEvals := a.createTimeoutLaterEvals(disconnecting, tg.Name)
+	lostLaterEvals := map[string]string{}
+	lostLater := []*delayedRescheduleInfo{}
+	if len(lost) > 0 {
+		lostLater = lost.delayByStopAfterClientDisconnect()
+		lostLaterEvals = a.createLostLaterEvals(lostLater, tg.Name)
+	}

 	// Merge disconnecting with the stop_after_client_disconnect set into the
 	// lostLaterEvals so that computeStop can add them to the stop set.
@ -486,13 +499,15 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// include stopped allocations.
 	isCanarying := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
 	stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, isCanarying, lostLaterEvals)
+
 	desiredChanges.Stop += uint64(len(stop))
 	untainted = untainted.difference(stop)

 	// Do inplace upgrades where possible and capture the set of upgrades that
 	// need to be done destructively.
-	ignore, inplace, destructive := a.computeUpdates(tg, untainted)
-	desiredChanges.Ignore += uint64(len(ignore))
+	ignoreUpdates, inplace, destructive := a.computeUpdates(tg, untainted)
+
+	desiredChanges.Ignore += uint64(len(ignoreUpdates))
 	desiredChanges.InPlaceUpdate += uint64(len(inplace))
 	if !existingDeployment {
 		dstate.DesiredTotal += len(destructive) + len(inplace)
@ -796,7 +811,8 @@ func (a *allocReconciler) computeReplacements(deploymentPlaceReady bool, desired
 	// replacements based off that.
 	failed := make(allocSet)
 	for id, alloc := range rescheduleNow {
-		if _, ok := a.result.disconnectUpdates[id]; !ok {
+		_, ok := a.result.disconnectUpdates[id]
+		if !ok && alloc.ClientStatus != structs.AllocClientStatusUnknown {
 			failed[id] = alloc
 		}
 	}
@ -968,8 +984,11 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 		untainted = untainted.difference(canaries)
 	}

+	// Remove disconnected allocations so they won't be stopped
+	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
+
 	// Hot path the nothing to do case
-	remove := len(untainted) + len(migrate) - group.Count
+	remove := len(knownUntainted) + len(migrate) - group.Count
 	if remove <= 0 {
 		return stop
 	}
@ -1072,7 +1091,7 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 //   - If the reconnecting allocation is to be stopped, its replacements may
 //     not be present in any of the returned sets. The rest of the reconciler
 //     logic will handle them.
-func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others allocSet) (allocSet, allocSet) {
+func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all allocSet) (allocSet, allocSet) {
 	stop := make(allocSet)
 	reconnect := make(allocSet)

@ -1111,14 +1130,11 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al

 		// Find replacement allocations and decide which one to stop. A
 		// reconnecting allocation may have multiple replacements.
-		for _, replacementAlloc := range others {
+		for _, replacementAlloc := range all {

 			// Skip allocations that are not a replacement of the one
-			// reconnecting. Replacement allocations have the same name but a
-			// higher CreateIndex and a different ID.
-			isReplacement := replacementAlloc.ID != reconnectingAlloc.ID &&
-				replacementAlloc.Name == reconnectingAlloc.Name &&
-				replacementAlloc.CreateIndex > reconnectingAlloc.CreateIndex
+			// reconnecting.
+			isReplacement := replacementAlloc.ID == reconnectingAlloc.NextAllocation

 			// Skip allocations that are server terminal.
 			// We don't want to replace a reconnecting allocation with one that
@ -1142,12 +1158,14 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, others al
 				}
 			} else {
 				// The reconnecting allocation is preferred, so stop this
-				// replacement.
-				stop[replacementAlloc.ID] = replacementAlloc
-				a.result.stop = append(a.result.stop, allocStopResult{
-					alloc:             replacementAlloc,
-					statusDescription: allocReconnected,
-				})
+				// replacement, but avoid re-stopping stopped allocs
+				if replacementAlloc.ClientStatus != structs.AllocClientStatusFailed {
+					stop[replacementAlloc.ID] = replacementAlloc
+					a.result.stop = append(a.result.stop, allocStopResult{
+						alloc:             replacementAlloc,
+						statusDescription: allocReconnected,
+					})
+				}
 			}
 		}
 	}
@ -1235,11 +1253,17 @@ func (a *allocReconciler) createRescheduleLaterEvals(rescheduleLater []*delayedR
 	allocIDToFollowupEvalID := a.createLostLaterEvals(rescheduleLater, tgName)

 	// Create updates that will be applied to the allocs to mark the FollowupEvalID
-	for allocID, evalID := range allocIDToFollowupEvalID {
-		existingAlloc := all[allocID]
+	for _, laterAlloc := range rescheduleLater {
+		existingAlloc := all[laterAlloc.alloc.ID]
 		updatedAlloc := existingAlloc.Copy()
-		updatedAlloc.FollowupEvalID = evalID
-		a.result.attributeUpdates[updatedAlloc.ID] = updatedAlloc
+		updatedAlloc.FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID]
+
+		// Can't updated an allocation that is disconnected
+		if _, ok := a.result.disconnectUpdates[laterAlloc.allocID]; !ok {
+			a.result.attributeUpdates[laterAlloc.allocID] = updatedAlloc
+		} else {
+			a.result.disconnectUpdates[laterAlloc.allocID].FollowupEvalID = allocIDToFollowupEvalID[laterAlloc.alloc.ID]
+		}
 	}
 }

@ -1355,8 +1379,8 @@ func (a *allocReconciler) createTimeoutLaterEvals(disconnecting allocSet, tgName
 	}

 	timeoutDelays, err := disconnecting.delayByMaxClientDisconnect(a.now)
-	if err != nil || len(timeoutDelays) != len(disconnecting) {
-		a.logger.Error("error computing disconnecting timeouts for task_group",
+	if err != nil {
+		a.logger.Error("error for task_group",
 			"task_group", tgName, "error", err)
 		return map[string]string{}
 	}
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@ -5327,7 +5327,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 		nodeScoreIncrement           float64
 		disconnectedAllocStatus      string
 		disconnectedAllocStates      []*structs.AllocState
-		serverDesiredStatus          string
 		isBatch                      bool
 		nodeStatusDisconnected       bool
 		replace                      bool
@ -5342,13 +5341,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {

 	testCases := []testCase{
 		{
-			name:                         "reconnect-original-no-replacement",
-			allocCount:                   2,
-			replace:                      false,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                    "reconnect-original-no-replacement",
+			allocCount:              2,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: false,
 			expected: &resultExpectation{
 				reconnectUpdates: 2,
@ -5360,13 +5359,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "resume-original-and-stop-replacement",
-			allocCount:                   3,
-			replace:                      true,
-			disconnectedAllocCount:       1,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                    "resume-original-and-stop-replacement",
+			allocCount:              3,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: false,
 			expected: &resultExpectation{
 				stop:             1,
@ -5380,13 +5379,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-original-with-lower-node-score",
-			allocCount:                   4,
-			replace:                      true,
-			disconnectedAllocCount:       1,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                    "stop-original-with-lower-node-score",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			nodeScoreIncrement:           1,
 			expected: &resultExpectation{
@ -5400,13 +5399,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-original-failed-on-reconnect",
-			allocCount:                   4,
-			replace:                      true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusFailed,
+			name:                    "stop-original-failed-on-reconnect",
+			allocCount:              4,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			expected: &resultExpectation{
 				stop: 2,
@ -5419,13 +5418,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "reschedule-original-failed-if-not-replaced",
-			allocCount:                   4,
-			replace:                      false,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusFailed,
+			name:                    "reschedule-original-failed-if-not-replaced",
+			allocCount:              4,
+			replace:                 false,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			expected: &resultExpectation{
 				stop:  2,
@ -5445,13 +5444,15 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			replace:                 false,
 			disconnectedAllocCount:  2,
 			disconnectedAllocStatus: structs.AllocClientStatusComplete,
+
 			disconnectedAllocStates: disconnectAllocState,
-			serverDesiredStatus:     structs.AllocDesiredStatusRun,
 			isBatch:                 true,
 			expected: &resultExpectation{
+				place: 2,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
 					"web": {
 						Ignore: 2,
+						Place:  2,
 					},
 				},
 			},
@ -5463,15 +5464,14 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			failReplacement:         true,
 			disconnectedAllocCount:  2,
 			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates: disconnectAllocState,
-			serverDesiredStatus:     structs.AllocDesiredStatusRun,
 			expected: &resultExpectation{
 				reconnectUpdates: 2,
-				stop:             2,
+				stop:             0,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
 					"web": {
-						Ignore: 3,
-						Stop:   2,
+						Ignore: 5,
 					},
 				},
 			},
@ -5483,8 +5483,8 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			disconnectReplacement:   true,
 			disconnectedAllocCount:  1,
 			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates: disconnectAllocState,
-			serverDesiredStatus:     structs.AllocDesiredStatusRun,
 			expected: &resultExpectation{
 				reconnectUpdates: 1,
 				stop:             1,
@ -5503,8 +5503,8 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			taintReplacement:        true,
 			disconnectedAllocCount:  2,
 			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates: disconnectAllocState,
-			serverDesiredStatus:     structs.AllocDesiredStatusRun,
 			expected: &resultExpectation{
 				reconnectUpdates: 2,
 				stop:             2,
@ -5517,13 +5517,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-original-alloc-with-old-job-version",
-			allocCount:                   5,
-			replace:                      true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                    "stop-original-alloc-with-old-job-version",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			jobVersionIncrement:          1,
 			expected: &resultExpectation{
@ -5537,13 +5537,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-original-alloc-with-old-job-version-reconnect-eval",
-			allocCount:                   5,
-			replace:                      true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                    "stop-original-alloc-with-old-job-version-reconnect-eval",
+			allocCount:              5,
+			replace:                 true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			jobVersionIncrement:          1,
 			expected: &resultExpectation{
@ -5557,35 +5557,36 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
-			allocCount:                   5,
-			replace:                      true,
-			failReplacement:              true,
-			replaceFailedReplacement:     true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusRunning,
+			name:                     "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
+			allocCount:               5,
+			replace:                  true,
+			failReplacement:          true,
+			replaceFailedReplacement: true,
+			disconnectedAllocCount:   2,
+			disconnectedAllocStatus:  structs.AllocClientStatusRunning,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
-			shouldStopOnDisconnectedNode: true,
+			shouldStopOnDisconnectedNode: false,
 			jobVersionIncrement:          1,
 			expected: &resultExpectation{
-				stop: 2,
+				stop:             2,
+				reconnectUpdates: 2,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
 					"web": {
 						Stop:   2,
-						Ignore: 5,
+						Ignore: 7,
 					},
 				},
 			},
 		},
 		{
-			name:                         "stop-original-pending-alloc-for-disconnected-node",
-			allocCount:                   2,
-			replace:                      true,
-			disconnectedAllocCount:       1,
-			disconnectedAllocStatus:      structs.AllocClientStatusPending,
+			name:                    "stop-original-pending-alloc-for-disconnected-node",
+			allocCount:              2,
+			replace:                 true,
+			disconnectedAllocCount:  1,
+			disconnectedAllocStatus: structs.AllocClientStatusPending,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			nodeStatusDisconnected:       true,
 			expected: &resultExpectation{
@ -5599,23 +5600,23 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			},
 		},
 		{
-			name:                         "stop-failed-original-and-failed-replacements-and-place-new",
-			allocCount:                   5,
-			replace:                      true,
-			failReplacement:              true,
-			disconnectedAllocCount:       2,
-			disconnectedAllocStatus:      structs.AllocClientStatusFailed,
+			name:                    "stop-failed-original-and-failed-replacements-and-place-new",
+			allocCount:              5,
+			replace:                 true,
+			failReplacement:         true,
+			disconnectedAllocCount:  2,
+			disconnectedAllocStatus: structs.AllocClientStatusFailed,
+
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			expected: &resultExpectation{
-				stop:  4,
+				stop:  2,
 				place: 2,
 				desiredTGUpdates: map[string]*structs.DesiredUpdates{
 					"web": {
-						Stop:   4,
+						Stop:   2,
 						Place:  2,
-						Ignore: 3,
+						Ignore: 5,
 					},
 				},
 			},
@ -5627,7 +5628,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			disconnectedAllocCount:       2,
 			disconnectedAllocStatus:      structs.AllocClientStatusUnknown,
 			disconnectedAllocStates:      disconnectAllocState,
-			serverDesiredStatus:          structs.AllocDesiredStatusRun,
 			shouldStopOnDisconnectedNode: true,
 			nodeStatusDisconnected:       true,
 			maxDisconnect:                pointer.Of(2 * time.Second),
@ -5648,7 +5648,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			disconnectedAllocCount:  2,
 			disconnectedAllocStatus: structs.AllocClientStatusRunning,
 			disconnectedAllocStates: []*structs.AllocState{},
-			serverDesiredStatus:     structs.AllocDesiredStatusRun,
 			nodeStatusDisconnected:  true,
 			expected: &resultExpectation{
 				place:             2,
@ -5687,7 +5686,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 			// Set alloc state
 			disconnectedAllocCount := tc.disconnectedAllocCount
 			for _, alloc := range allocs {
-				alloc.DesiredStatus = tc.serverDesiredStatus
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun

 				if tc.maxDisconnect != nil {
 					alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
@ -5699,7 +5698,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 					// Set the node id on all the disconnected allocs to the node under test.
 					alloc.NodeID = testNode.ID
 					alloc.NodeName = "disconnected"
-
 					disconnectedAllocCount--
 				}
 			}
@ -5791,6 +5789,12 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
 // Tests that a client disconnect while a canary is in progress generates the result.
 func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {

+	disconnectAllocState := []*structs.AllocState{{
+		Field: structs.AllocStateFieldClientStatus,
+		Value: structs.AllocClientStatusUnknown,
+		Time:  time.Now(),
+	}}
+
 	type testCase struct {
 		name            string
 		nodes           []string
@ -5883,7 +5887,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 					updatedJob.TaskGroups[0].Name: {
 						Place:  3,
 						Canary: 0,
-						Ignore: 3,
+						Ignore: 6,
 					},
 				},
 			},
@ -5947,7 +5951,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 					updatedJob.TaskGroups[0].Name: {
 						Place:  2,
 						Canary: 0,
-						Ignore: 4,
+						Ignore: 7,
 					},
 				},
 			},
@ -6013,7 +6017,7 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 					updatedJob.TaskGroups[0].Name: {
 						Place:  2,
 						Canary: 0,
-						Ignore: 3,
+						Ignore: 6,
 						// The 2 stops in this test are transient failures, but
 						// the deployment can still progress. We don't include
 						// them in the stop count since DesiredTGUpdates is used
@ -6083,6 +6087,12 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						alloc.DeploymentStatus.Healthy = pointer.Of(true)
 					}
+
+					if alloc.ClientStatus == structs.AllocClientStatusUnknown {
+						alloc.AllocStates = disconnectAllocState
+						alloc.FollowupEvalID = "eval-where-it-was-set-to-unknow"
+					}
+
 					tc.deploymentState.PlacedCanaries = append(tc.deploymentState.PlacedCanaries, alloc.ID)
 					handled[alloc.ID] = allocUpdateFnIgnore
 					canariesConfigured++
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@ -9,6 +9,7 @@ package scheduler
 // all scheduler types before moving it into util.go

 import (
+	"errors"
 	"fmt"
 	"sort"
 	"strings"
@ -264,10 +265,11 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS

 		taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
 		if taintedNode != nil {
-			// Group disconnecting/reconnecting
+			// Group disconnecting
 			switch taintedNode.Status {
 			case structs.NodeStatusDisconnected:
 				if supportsDisconnectedClients {
+
 					// Filter running allocs on a node that is disconnected to be marked as unknown.
 					if alloc.ClientStatus == structs.AllocClientStatusRunning {
 						disconnecting[alloc.ID] = alloc
@ -289,6 +291,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 						lost[alloc.ID] = alloc
 						continue
 					}
+
 					reconnecting[alloc.ID] = alloc
 					continue
 				}
@ -296,9 +299,16 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			}
 		}

-		// Terminal allocs, if not reconnect, are always untainted as they
-		// should never be migrated.
 		if alloc.TerminalStatus() && !reconnect {
+			// Terminal allocs, if supportsDisconnectedClient and not reconnect,
+			// are probably stopped replacements and should be ignored
+			if supportsDisconnectedClients {
+				ignore[alloc.ID] = alloc
+				continue
+			}
+
+			// Terminal allocs, if not reconnect, are always untainted as they
+			// should never be migrated.
 			untainted[alloc.ID] = alloc
 			continue
 		}
@ -315,11 +325,11 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 			continue
 		}

-		// Ignore unknown allocs that we want to reconnect eventually.
+		// Acknowledge unknown allocs that we want to reconnect eventually.
 		if supportsDisconnectedClients &&
 			alloc.ClientStatus == structs.AllocClientStatusUnknown &&
 			alloc.DesiredStatus == structs.AllocDesiredStatusRun {
-			ignore[alloc.ID] = alloc
+			untainted[alloc.ID] = alloc
 			continue
 		}

@ -366,12 +376,11 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
 // untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
 // at a future time are also returned so that we can create follow up evaluations for them. Allocs are
 // skipped or considered untainted according to logic defined in shouldFilter method.
-func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time.Time, evalID string, deployment *structs.Deployment) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
-	untainted = make(map[string]*structs.Allocation)
-	rescheduleNow = make(map[string]*structs.Allocation)
+func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time.Time, evalID string, deployment *structs.Deployment) (allocSet, allocSet, []*delayedRescheduleInfo) {
+	untainted := make(map[string]*structs.Allocation)
+	rescheduleNow := make(map[string]*structs.Allocation)
+	rescheduleLater := []*delayedRescheduleInfo{}

-	// When filtering disconnected sets, the untainted set is never populated.
-	// It has no purpose in that context.
 	for _, alloc := range a {
 		// Ignore disconnecting allocs that are already unknown. This can happen
 		// in the case of canaries that are interrupted by a disconnect.
@ -393,25 +402,27 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
 		if isUntainted && !isDisconnecting {
 			untainted[alloc.ID] = alloc
 		}
-		if isUntainted || ignore {
+
+		if ignore {
 			continue
 		}

-		// Only failed allocs with desired state run get to this point
-		// If the failed alloc is not eligible for rescheduling now we
-		// add it to the untainted set. Disconnecting delay evals are
-		// handled by allocReconciler.createTimeoutLaterEvals
 		eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID, deployment, isDisconnecting)
-		if !isDisconnecting && !eligibleNow {
-			untainted[alloc.ID] = alloc
-			if eligibleLater {
-				rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, alloc, rescheduleTime})
-			}
-		} else {
+		if eligibleNow {
 			rescheduleNow[alloc.ID] = alloc
+			continue
 		}
+
+		// If the failed alloc is not eligible for rescheduling now we
+		// add it to the untainted set.
+		untainted[alloc.ID] = alloc
+
+		if eligibleLater {
+			rescheduleLater = append(rescheduleLater, &delayedRescheduleInfo{alloc.ID, alloc, rescheduleTime})
+		}
+
 	}
-	return
+	return untainted, rescheduleNow, rescheduleLater
 }

 // shouldFilter returns whether the alloc should be ignored or considered untainted.
@ -436,32 +447,31 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
 			if alloc.RanSuccessfully() {
 				return true, false
 			}
+
 			return false, true
 		case structs.AllocDesiredStatusEvict:
 			return false, true
-		default:
 		}

 		switch alloc.ClientStatus {
 		case structs.AllocClientStatusFailed:
-		default:
-			return true, false
+			return false, false
 		}
-		return false, false
+
+		return true, false
 	}

 	// Handle service jobs
 	switch alloc.DesiredStatus {
 	case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
 		return false, true
-	default:
 	}

 	switch alloc.ClientStatus {
 	case structs.AllocClientStatusComplete, structs.AllocClientStatusLost:
 		return false, true
-	default:
 	}
+
 	return false, false
 }

@ -481,9 +491,15 @@ func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID stri

 	// Reschedule if the eval ID matches the alloc's followup evalID or if its close to its reschedule time
 	var eligible bool
-	if isDisconnecting {
-		rescheduleTime, eligible = alloc.NextRescheduleTimeByFailTime(now)
-	} else {
+	switch {
+	case isDisconnecting:
+		rescheduleTime, eligible = alloc.NextRescheduleTimeByTime(now)
+
+	case alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.FollowupEvalID == evalID:
+		lastDisconnectTime := alloc.LastUnknown()
+		rescheduleTime, eligible = alloc.NextRescheduleTimeByTime(lastDisconnectTime)
+
+	default:
 		rescheduleTime, eligible = alloc.NextRescheduleTime()
 	}

@ -491,9 +507,11 @@ func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID stri
 		rescheduleNow = true
 		return
 	}
-	if eligible && alloc.FollowupEvalID == "" {
+
+	if eligible && (alloc.FollowupEvalID == "" || isDisconnecting) {
 		rescheduleLater = true
 	}
+
 	return
 }

@ -547,12 +565,13 @@ func (a allocSet) delayByStopAfterClientDisconnect() (later []*delayedReschedule

 // delayByMaxClientDisconnect returns a delay for any unknown allocation
 // that's got a max_client_reconnect configured
-func (a allocSet) delayByMaxClientDisconnect(now time.Time) (later []*delayedRescheduleInfo, err error) {
+func (a allocSet) delayByMaxClientDisconnect(now time.Time) ([]*delayedRescheduleInfo, error) {
+	var later []*delayedRescheduleInfo
+
 	for _, alloc := range a {
 		timeout := alloc.DisconnectTimeout(now)
-
 		if !timeout.After(now) {
-			continue
+			return nil, errors.New("unable to computing disconnecting timeouts")
 		}

 		later = append(later, &delayedRescheduleInfo{
@ -562,7 +581,19 @@ func (a allocSet) delayByMaxClientDisconnect(now time.Time) (later []*delayedRes
 		})
 	}

-	return
+	return later, nil
+}
+
+// filterOutByClientStatus returns all allocs from the set without the specified client status.
+func (a allocSet) filterOutByClientStatus(clientStatus string) allocSet {
+	allocs := make(allocSet)
+	for _, alloc := range a {
+		if alloc.ClientStatus != clientStatus {
+			allocs[alloc.ID] = alloc
+		}
+	}
+
+	return allocs
 }

 // filterByClientStatus returns allocs from the set with the specified client status.
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@ -237,7 +237,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 			},
 		},
-
 		{
 			name:                        "disco-client-disconnect-unset-max-disconnect",
 			supportsDisconnectedClients: true,
@ -273,7 +272,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 				},
 			},
 		},
-
 		// Everything below this line tests the disconnected client mode.
 		{
 			name:                        "disco-client-untainted-reconnect-failed-and-replaced",
@ -381,10 +379,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			taintedNodes:                nodes,
 			skipNilNodeTest:             false,
 			all: allocSet{
-				// Allocs on reconnected nodes that are complete are untainted
-				"untainted-reconnect-complete": {
-					ID:            "untainted-reconnect-complete",
-					Name:          "untainted-reconnect-complete",
+				// Allocs on reconnected nodes that are complete are ignored
+				"ignored-reconnect-complete": {
+					ID:            "ignored-reconnect-complete",
+					Name:          "ignored-reconnect-complete",
 					ClientStatus:  structs.AllocClientStatusComplete,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@ -405,9 +403,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 				// Lost allocs on reconnected nodes don't get restarted
-				"untainted-reconnect-lost": {
-					ID:            "untainted-reconnect-lost",
-					Name:          "untainted-reconnect-lost",
+				"ignored-reconnect-lost": {
+					ID:            "ignored-reconnect-lost",
+					Name:          "ignored-reconnect-lost",
 					ClientStatus:  structs.AllocClientStatusLost,
 					DesiredStatus: structs.AllocDesiredStatusStop,
 					Job:           testJob,
@ -415,10 +413,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					TaskGroup:     "web",
 					AllocStates:   unknownAllocState,
 				},
-				// Replacement allocs that are complete are untainted
-				"untainted-reconnect-complete-replacement": {
-					ID:                 "untainted-reconnect-complete-replacement",
-					Name:               "untainted-reconnect-complete",
+				// Replacement allocs that are complete are ignored
+				"ignored-reconnect-complete-replacement": {
+					ID:                 "ignored-reconnect-complete-replacement",
+					Name:               "ignored-reconnect-complete",
 					ClientStatus:       structs.AllocClientStatusComplete,
 					DesiredStatus:      structs.AllocDesiredStatusRun,
 					Job:                testJob,
@ -427,10 +425,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:        unknownAllocState,
 					PreviousAllocation: "untainted-reconnect-complete",
 				},
-				// Replacement allocs on reconnected nodes that are failed are untainted
-				"untainted-reconnect-failed-replacement": {
-					ID:                 "untainted-reconnect-failed-replacement",
-					Name:               "untainted-reconnect-failed",
+				// Replacement allocs on reconnected nodes that are failed are ignored
+				"ignored-reconnect-failed-replacement": {
+					ID:                 "ignored-reconnect-failed-replacement",
+					Name:               "ignored-reconnect-failed",
 					ClientStatus:       structs.AllocClientStatusFailed,
 					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
@ -439,63 +437,9 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					PreviousAllocation: "reconnecting-failed",
 				},
 				// Lost replacement allocs on reconnected nodes don't get restarted
-				"untainted-reconnect-lost-replacement": {
-					ID:                 "untainted-reconnect-lost-replacement",
-					Name:               "untainted-reconnect-lost",
-					ClientStatus:       structs.AllocClientStatusLost,
-					DesiredStatus:      structs.AllocDesiredStatusStop,
-					Job:                testJob,
-					NodeID:             "normal",
-					TaskGroup:          "web",
-					AllocStates:        unknownAllocState,
-					PreviousAllocation: "untainted-reconnect-lost",
-				},
-			},
-			untainted: allocSet{
-				"untainted-reconnect-complete": {
-					ID:            "untainted-reconnect-complete",
-					Name:          "untainted-reconnect-complete",
-					ClientStatus:  structs.AllocClientStatusComplete,
-					DesiredStatus: structs.AllocDesiredStatusRun,
-					Job:           testJob,
-					NodeID:        "normal",
-					TaskGroup:     "web",
-					AllocStates:   unknownAllocState,
-				},
-				"untainted-reconnect-lost": {
-					ID:            "untainted-reconnect-lost",
-					Name:          "untainted-reconnect-lost",
-					ClientStatus:  structs.AllocClientStatusLost,
-					DesiredStatus: structs.AllocDesiredStatusStop,
-					Job:           testJob,
-					NodeID:        "normal",
-					TaskGroup:     "web",
-					AllocStates:   unknownAllocState,
-				},
-				"untainted-reconnect-complete-replacement": {
-					ID:                 "untainted-reconnect-complete-replacement",
-					Name:               "untainted-reconnect-complete",
-					ClientStatus:       structs.AllocClientStatusComplete,
-					DesiredStatus:      structs.AllocDesiredStatusRun,
-					Job:                testJob,
-					NodeID:             "normal",
-					TaskGroup:          "web",
-					AllocStates:        unknownAllocState,
-					PreviousAllocation: "untainted-reconnect-complete",
-				},
-				"untainted-reconnect-failed-replacement": {
-					ID:                 "untainted-reconnect-failed-replacement",
-					Name:               "untainted-reconnect-failed",
-					ClientStatus:       structs.AllocClientStatusFailed,
-					DesiredStatus:      structs.AllocDesiredStatusStop,
-					Job:                testJob,
-					NodeID:             "normal",
-					TaskGroup:          "web",
-					PreviousAllocation: "reconnecting-failed",
-				},
-				"untainted-reconnect-lost-replacement": {
-					ID:                 "untainted-reconnect-lost-replacement",
-					Name:               "untainted-reconnect-lost",
+				"ignored-reconnect-lost-replacement": {
+					ID:                 "ignored-reconnect-lost-replacement",
+					Name:               "ignored-reconnect-lost",
 					ClientStatus:       structs.AllocClientStatusLost,
 					DesiredStatus:      structs.AllocDesiredStatusStop,
 					Job:                testJob,
@ -505,6 +449,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					PreviousAllocation: "untainted-reconnect-lost",
 				},
 			},
+			untainted:     allocSet{},
 			migrate:       allocSet{},
 			disconnecting: allocSet{},
 			reconnecting: allocSet{
@ -519,8 +464,62 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 			},
-			ignore: allocSet{},
-			lost:   allocSet{},
+			ignore: allocSet{
+
+				"ignored-reconnect-complete": {
+					ID:            "ignored-reconnect-complete",
+					Name:          "ignored-reconnect-complete",
+					ClientStatus:  structs.AllocClientStatusComplete,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+				"ignored-reconnect-lost": {
+					ID:            "ignored-reconnect-lost",
+					Name:          "ignored-reconnect-lost",
+					ClientStatus:  structs.AllocClientStatusLost,
+					DesiredStatus: structs.AllocDesiredStatusStop,
+					Job:           testJob,
+					NodeID:        "normal",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+				"ignored-reconnect-complete-replacement": {
+					ID:                 "ignored-reconnect-complete-replacement",
+					Name:               "ignored-reconnect-complete",
+					ClientStatus:       structs.AllocClientStatusComplete,
+					DesiredStatus:      structs.AllocDesiredStatusRun,
+					Job:                testJob,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					AllocStates:        unknownAllocState,
+					PreviousAllocation: "untainted-reconnect-complete",
+				},
+				"ignored-reconnect-failed-replacement": {
+					ID:                 "ignored-reconnect-failed-replacement",
+					Name:               "ignored-reconnect-failed",
+					ClientStatus:       structs.AllocClientStatusFailed,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
+					Job:                testJob,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					PreviousAllocation: "reconnecting-failed",
+				},
+				"ignored-reconnect-lost-replacement": {
+					ID:                 "ignored-reconnect-lost-replacement",
+					Name:               "ignored-reconnect-lost",
+					ClientStatus:       structs.AllocClientStatusLost,
+					DesiredStatus:      structs.AllocDesiredStatusStop,
+					Job:                testJob,
+					NodeID:             "normal",
+					TaskGroup:          "web",
+					AllocStates:        unknownAllocState,
+					PreviousAllocation: "untainted-reconnect-lost",
+				},
+			},
+			lost: allocSet{},
 		},
 		{
 			name:                        "disco-client-disconnect",
@ -539,10 +538,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					NodeID:        "disconnected",
 					TaskGroup:     "web",
 				},
-				// Unknown allocs on disconnected nodes are ignored
-				"ignore-unknown": {
-					ID:            "ignore-unknown",
-					Name:          "ignore-unknown",
+				// Unknown allocs on disconnected nodes are acknowledge, so they wont be rescheduled again
+				"untainted-unknown": {
+					ID:            "untainted-unknown",
+					Name:          "untainted-unknown",
 					ClientStatus:  structs.AllocClientStatusUnknown,
 					DesiredStatus: structs.AllocDesiredStatusRun,
 					Job:           testJob,
@ -595,8 +594,20 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 					AllocStates:   unknownAllocState,
 				},
 			},
-			untainted: allocSet{},
-			migrate:   allocSet{},
+			untainted: allocSet{
+				// Unknown allocs on disconnected nodes are acknowledge, so they wont be rescheduled again
+				"untainted-unknown": {
+					ID:            "untainted-unknown",
+					Name:          "untainted-unknown",
+					ClientStatus:  structs.AllocClientStatusUnknown,
+					DesiredStatus: structs.AllocDesiredStatusRun,
+					Job:           testJob,
+					NodeID:        "disconnected",
+					TaskGroup:     "web",
+					AllocStates:   unknownAllocState,
+				},
+			},
+			migrate: allocSet{},
 			disconnecting: allocSet{
 				"disconnect-running": {
 					ID:            "disconnect-running",
@ -610,17 +621,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
 			},
 			reconnecting: allocSet{},
 			ignore: allocSet{
-				// Unknown allocs on disconnected nodes are ignored
-				"ignore-unknown": {
-					ID:            "ignore-unknown",
-					Name:          "ignore-unknown",
-					ClientStatus:  structs.AllocClientStatusUnknown,
-					DesiredStatus: structs.AllocDesiredStatusRun,
-					Job:           testJob,
-					NodeID:        "disconnected",
-					TaskGroup:     "web",
-					AllocStates:   unknownAllocState,
-				},
 				"ignore-reconnected-failed-stopped": {
 					ID:            "ignore-reconnected-failed-stopped",
 					Name:          "ignore-reconnected-failed-stopped",
@ -1167,3 +1167,202 @@ func Test_allocNameIndex_Next(t *testing.T) {
 		})
 	}
 }
+
+func TestAllocSet_filterByRescheduleable(t *testing.T) {
+	ci.Parallel(t)
+
+	noRescheduleJob := mock.Job()
+	noRescheduleTG := &structs.TaskGroup{
+		Name: "noRescheduleTG",
+		ReschedulePolicy: &structs.ReschedulePolicy{
+			Attempts:  0,
+			Unlimited: false,
+		},
+	}
+
+	noRescheduleJob.TaskGroups[0] = noRescheduleTG
+
+	testJob := mock.Job()
+	rescheduleTG := &structs.TaskGroup{
+		Name: "rescheduleTG",
+		ReschedulePolicy: &structs.ReschedulePolicy{
+			Attempts:  1,
+			Unlimited: false,
+		},
+	}
+	testJob.TaskGroups[0] = rescheduleTG
+
+	now := time.Now()
+
+	type testCase struct {
+		name                        string
+		all                         allocSet
+		isBatch                     bool
+		supportsDisconnectedClients bool
+		isDisconnecting             bool
+		deployment                  *structs.Deployment
+
+		// expected results
+		untainted allocSet
+		resNow    allocSet
+		resLater  []*delayedRescheduleInfo
+	}
+
+	testCases := []testCase{
+		{
+			name:            "batch disconnecting allocation no reschedule",
+			isDisconnecting: true,
+			isBatch:         true,
+			all: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			untainted: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			resNow:   allocSet{},
+			resLater: []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "batch ignore unknown disconnecting allocs",
+			isDisconnecting: true,
+			isBatch:         true,
+			all: allocSet{
+				"disconnecting1": {
+					ID:           "disconnection1",
+					ClientStatus: structs.AllocClientStatusUnknown,
+					Job:          testJob,
+				},
+			},
+			untainted: allocSet{},
+			resNow:    allocSet{},
+			resLater:  []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "batch disconnecting allocation reschedule",
+			isDisconnecting: true,
+			isBatch:         true,
+			all: allocSet{
+				"rescheduleNow1": {
+					ID:           "rescheduleNow1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJob,
+					TaskGroup:    "rescheduleTG",
+				},
+			},
+			untainted: allocSet{},
+			resNow: allocSet{
+				"rescheduleNow1": {
+					ID:           "rescheduleNow1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJob,
+					TaskGroup:    "rescheduleTG",
+				},
+			},
+			resLater: []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "service disconnecting allocation no reschedule",
+			isDisconnecting: true,
+			isBatch:         false,
+			all: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			untainted: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			resNow:   allocSet{},
+			resLater: []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "service disconnecting allocation reschedule",
+			isDisconnecting: true,
+			isBatch:         false,
+			all: allocSet{
+				"rescheduleNow1": {
+					ID:           "rescheduleNow1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJob,
+					TaskGroup:    "rescheduleTG",
+				},
+			},
+			untainted: allocSet{},
+			resNow: allocSet{
+				"rescheduleNow1": {
+					ID:           "rescheduleNow1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          testJob,
+					TaskGroup:    "rescheduleTG",
+				},
+			},
+			resLater: []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "service ignore unknown disconnecting allocs",
+			isDisconnecting: true,
+			isBatch:         false,
+			all: allocSet{
+				"disconnecting1": {
+					ID:           "disconnection1",
+					ClientStatus: structs.AllocClientStatusUnknown,
+					Job:          testJob,
+				},
+			},
+			untainted: allocSet{},
+			resNow:    allocSet{},
+			resLater:  []*delayedRescheduleInfo{},
+		},
+		{
+			name:            "service running allocation no reschedule",
+			isDisconnecting: false,
+			isBatch:         true,
+			all: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			untainted: allocSet{
+				"untainted1": {
+					ID:           "untainted1",
+					ClientStatus: structs.AllocClientStatusRunning,
+					Job:          noRescheduleJob,
+					TaskGroup:    "noRescheduleTG",
+				},
+			},
+			resNow:   allocSet{},
+			resLater: []*delayedRescheduleInfo{},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			untainted, resNow, resLater := tc.all.filterByRescheduleable(tc.isBatch,
+				tc.isDisconnecting, now, "evailID", tc.deployment)
+			must.Eq(t, tc.untainted, untainted, must.Sprintf("with-nodes: untainted"))
+			must.Eq(t, tc.resNow, resNow, must.Sprintf("with-nodes: reschedule-now"))
+			must.Eq(t, tc.resLater, resLater, must.Sprintf("with-nodes: rescheduleLater"))
+		})
+	}
+}