Only use DesiredTransition.Reschedule in reconciler when its an active deployment
This commit is contained in:
parent
6f92e0711c
commit
5329900f6d
|
@ -340,7 +340,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
|
|||
untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
|
||||
|
||||
// Determine what set of terminal allocations need to be rescheduled
|
||||
untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, a.now, a.evalID)
|
||||
untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, a.now, a.evalID, a.deployment)
|
||||
|
||||
// Create batched follow up evaluations for allocations that are
|
||||
// reschedulable later and mark the allocations for in place updating
|
||||
|
|
|
@ -4063,3 +4063,55 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
|
|||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Test that a successful deployment with failed allocs will result in
|
||||
// rescheduling failed allocations
|
||||
func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Update = noCanaryUpdate
|
||||
tgName := job.TaskGroups[0].Name
|
||||
now := time.Now()
|
||||
|
||||
// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
|
||||
d := structs.NewDeployment(job)
|
||||
d.Status = structs.DeploymentStatusSuccessful
|
||||
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
|
||||
Promoted: false,
|
||||
DesiredTotal: 10,
|
||||
PlacedAllocs: 10,
|
||||
}
|
||||
|
||||
// Create 10 allocations
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 10; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = uuid.Generate()
|
||||
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
alloc.DeploymentID = d.ID
|
||||
alloc.ClientStatus = structs.AllocClientStatusFailed
|
||||
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
|
||||
StartedAt: now.Add(-1 * time.Hour),
|
||||
FinishedAt: now.Add(-10 * time.Second)}}
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
|
||||
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
|
||||
r := reconciler.Compute()
|
||||
|
||||
// Assert that rescheduled placements were created
|
||||
assertResults(t, r, &resultExpectation{
|
||||
place: 10,
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 10,
|
||||
Ignore: 0,
|
||||
},
|
||||
},
|
||||
})
|
||||
assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
|
||||
}
|
||||
|
|
|
@ -234,7 +234,7 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi
|
|||
// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
|
||||
// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
|
||||
// skipped or considered untainted according to logic defined in shouldFilter method.
|
||||
func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID string) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
|
||||
func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID string, deployment *structs.Deployment) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
|
||||
untainted = make(map[string]*structs.Allocation)
|
||||
rescheduleNow = make(map[string]*structs.Allocation)
|
||||
|
||||
|
@ -257,7 +257,7 @@ func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID str
|
|||
|
||||
// Only failed allocs with desired state run get to this point
|
||||
// If the failed alloc is not eligible for rescheduling now we add it to the untainted set
|
||||
eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID)
|
||||
eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID, deployment)
|
||||
if !eligibleNow {
|
||||
untainted[alloc.ID] = alloc
|
||||
if eligibleLater {
|
||||
|
@ -320,11 +320,14 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
|
|||
|
||||
// updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation
|
||||
// should be rescheduled now, later or left in the untainted set
|
||||
func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID string) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
|
||||
func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID string, d *structs.Deployment) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
|
||||
// If the allocation is part of an ongoing active deployment, we only allow it to reschedule
|
||||
// if it has been marked eligible
|
||||
if alloc.DeploymentID != "" && d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() {
|
||||
return
|
||||
}
|
||||
|
||||
// If the allocation is part of a deployment, only allow it to reschedule if
|
||||
// it has been marked eligible for it explicitly.
|
||||
if alloc.DeploymentID != "" && !alloc.DesiredTransition.ShouldReschedule() {
|
||||
if d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() {
|
||||
return
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue