Only use DesiredTransition.Reschedule in reconciler when its an active deployment

This commit is contained in:
Preetha Appan 2018-04-17 14:41:36 -05:00
parent 6f92e0711c
commit 5329900f6d
No known key found for this signature in database
GPG Key ID: 9F7C19990A50EAFC
3 changed files with 62 additions and 7 deletions

View File

@ -340,7 +340,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
// Determine what set of terminal allocations need to be rescheduled
untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, a.now, a.evalID)
untainted, rescheduleNow, rescheduleLater := untainted.filterByRescheduleable(a.batch, a.now, a.evalID, a.deployment)
// Create batched follow up evaluations for allocations that are
// reschedulable later and mark the allocations for in place updating

View File

@ -4063,3 +4063,55 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
},
})
}
// Test that a successful deployment with failed allocs will result in
// rescheduling failed allocations
func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
tgName := job.TaskGroups[0].Name
now := time.Now()
// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
d := structs.NewDeployment(job)
d.Status = structs.DeploymentStatusSuccessful
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 10,
PlacedAllocs: 10,
}
// Create 10 allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
r := reconciler.Compute()
// Assert that rescheduled placements were created
assertResults(t, r, &resultExpectation{
place: 10,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 10,
Ignore: 0,
},
},
})
assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
}

View File

@ -234,7 +234,7 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi
// untainted or a set of allocations that must be rescheduled now. Allocations that can be rescheduled
// at a future time are also returned so that we can create follow up evaluations for them. Allocs are
// skipped or considered untainted according to logic defined in shouldFilter method.
func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID string) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID string, deployment *structs.Deployment) (untainted, rescheduleNow allocSet, rescheduleLater []*delayedRescheduleInfo) {
untainted = make(map[string]*structs.Allocation)
rescheduleNow = make(map[string]*structs.Allocation)
@ -257,7 +257,7 @@ func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID str
// Only failed allocs with desired state run get to this point
// If the failed alloc is not eligible for rescheduling now we add it to the untainted set
eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID)
eligibleNow, eligibleLater, rescheduleTime = updateByReschedulable(alloc, now, evalID, deployment)
if !eligibleNow {
untainted[alloc.ID] = alloc
if eligibleLater {
@ -320,11 +320,14 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
// updateByReschedulable is a helper method that encapsulates logic for whether a failed allocation
// should be rescheduled now, later or left in the untainted set
func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID string) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
func updateByReschedulable(alloc *structs.Allocation, now time.Time, evalID string, d *structs.Deployment) (rescheduleNow, rescheduleLater bool, rescheduleTime time.Time) {
// If the allocation is part of an ongoing active deployment, we only allow it to reschedule
// if it has been marked eligible
if alloc.DeploymentID != "" && d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() {
return
}
// If the allocation is part of a deployment, only allow it to reschedule if
// it has been marked eligible for it explicitly.
if alloc.DeploymentID != "" && !alloc.DesiredTransition.ShouldReschedule() {
if d != nil && alloc.DeploymentID == d.ID && d.Active() && !alloc.DesiredTransition.ShouldReschedule() {
return
}