diff --git a/api/allocations.go b/api/allocations.go index dac3e2bd2..f3162ea44 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -214,6 +214,10 @@ type DesiredTransition struct { // Migrate is used to indicate that this allocation should be stopped and // migrated to another node. Migrate *bool + + // Reschedule is used to indicate that this allocation is eligible to be + // rescheduled. + Reschedule *bool } // ShouldMigrate returns whether the transition object dictates a migration. diff --git a/nomad/deploymentwatcher/deployment_watcher.go b/nomad/deploymentwatcher/deployment_watcher.go index 9f5696a8e..a858ccc77 100644 --- a/nomad/deploymentwatcher/deployment_watcher.go +++ b/nomad/deploymentwatcher/deployment_watcher.go @@ -22,6 +22,15 @@ const ( perJobEvalBatchPeriod = 1 * time.Second ) +var ( + // allowRescheduleTransistion is the transistion that allows failed + // allocations part of a deployment to be rescheduled. We create a one off + // variable to avoid creating a new object for every request. + allowRescheduleTransistion = &structs.DesiredTransition{ + Reschedule: helper.BoolToPtr(true), + } +) + // deploymentTriggers are the set of functions required to trigger changes on // behalf of a deployment type deploymentTriggers interface { @@ -70,9 +79,13 @@ type deploymentWatcher struct { j *structs.Job // outstandingBatch marks whether an outstanding function exists to create - // the evaluation. Access should be done through the lock + // the evaluation. Access should be done through the lock. outstandingBatch bool + // outstandingAllowReplacements is the map of allocations that will be + // marked as allowing a replacement. Access should be done through the lock. + outstandingAllowReplacements map[string]*structs.DesiredTransition + // latestEval is the latest eval for the job. It is updated by the watch // loop and any time an evaluation is created. The field should be accessed // by holding the lock or using the setter and getter methods. @@ -429,8 +442,8 @@ FAIL: } // Create an eval to push the deployment along - if res.createEval { - w.createEvalBatched(allocIndex) + if res.createEval || len(res.allowReplacements) != 0 { + w.createBatchedUpdate(res.allowReplacements, allocIndex) } } } @@ -472,9 +485,10 @@ FAIL: // allocUpdateResult is used to return the desired actions given the newest set // of allocations for the deployment. type allocUpdateResult struct { - createEval bool - failDeployment bool - rollback bool + createEval bool + failDeployment bool + rollback bool + allowReplacements []string } // handleAllocUpdate is used to compute the set of actions to take based on the @@ -504,13 +518,18 @@ func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) ( continue } + // Determine if the update stanza for this group is progress based + progressBased := dstate.ProgressDeadline != 0 + // We need to create an eval so the job can progress. if alloc.DeploymentStatus.IsHealthy() { res.createEval = true + } else if progressBased && alloc.DeploymentStatus.IsUnhealthy() && deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() { + res.allowReplacements = append(res.allowReplacements, alloc.ID) } - // If the group is using a deadline, we don't have to do anything. - if dstate.ProgressDeadline != 0 { + // If the group is using a progress deadline, we don't have to do anything. + if progressBased { continue } @@ -601,12 +620,21 @@ func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) { return stable, nil } -// createEvalBatched creates an eval but batches calls together -func (w *deploymentWatcher) createEvalBatched(forIndex uint64) { +// createBatchedUpdate creates an eval for the given index as well as updating +// the given allocations to allow them to reschedule. +func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) { w.l.Lock() defer w.l.Unlock() - if w.outstandingBatch || forIndex < w.latestEval { + // Store the allocations that can be replaced + for _, allocID := range allowReplacements { + if w.outstandingAllowReplacements == nil { + w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements)) + } + w.outstandingAllowReplacements[allocID] = allowRescheduleTransistion + } + + if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) { return } @@ -621,17 +649,18 @@ func (w *deploymentWatcher) createEvalBatched(forIndex uint64) { default: } + w.l.Lock() + replacements := w.outstandingAllowReplacements + w.outstandingAllowReplacements = nil + w.outstandingBatch = false + w.l.Unlock() + // Create the eval - if index, err := w.createUpdate(nil, w.getEval()); err != nil { + if index, err := w.createUpdate(replacements, w.getEval()); err != nil { w.logger.Printf("[ERR] nomad.deployment_watcher: failed to create evaluation for deployment %q: %v", w.deploymentID, err) } else { w.setLatestEval(index) } - - w.l.Lock() - w.outstandingBatch = false - w.l.Unlock() - }) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2bd32a4bc..fda47f1aa 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5588,6 +5588,13 @@ type DesiredTransition struct { // Migrate is used to indicate that this allocation should be stopped and // migrated to another node. Migrate *bool + + // Reschedule is used to indicate that this allocation is eligible to be + // rescheduled. Most allocations are automatically eligible for + // rescheduling, so this field is only required when an allocation is not + // automatically eligible. An example is an allocation that is part of a + // deployment. + Reschedule *bool } // Merge merges the two desired transitions, preferring the values from the @@ -5596,6 +5603,10 @@ func (d *DesiredTransition) Merge(o *DesiredTransition) { if o.Migrate != nil { d.Migrate = o.Migrate } + + if o.Reschedule != nil { + d.Reschedule = o.Reschedule + } } // ShouldMigrate returns whether the transition object dictates a migration. @@ -5603,6 +5614,12 @@ func (d *DesiredTransition) ShouldMigrate() bool { return d.Migrate != nil && *d.Migrate } +// ShouldReschedule returns whether the transition object dictates a +// rescheduling. +func (d *DesiredTransition) ShouldReschedule() bool { + return d.Reschedule != nil && *d.Reschedule +} + const ( AllocDesiredStatusRun = "run" // Allocation should run AllocDesiredStatusStop = "stop" // Allocation should stop @@ -6022,6 +6039,7 @@ func (a *Allocation) Stub() *AllocListStub { DesiredDescription: a.DesiredDescription, ClientStatus: a.ClientStatus, ClientDescription: a.ClientDescription, + DesiredTransition: a.DesiredTransition, TaskStates: a.TaskStates, DeploymentStatus: a.DeploymentStatus, FollowupEvalID: a.FollowupEvalID, @@ -6046,6 +6064,7 @@ type AllocListStub struct { DesiredDescription string ClientStatus string ClientDescription string + DesiredTransition DesiredTransition TaskStates map[string]*TaskState DeploymentStatus *AllocDeploymentStatus FollowupEvalID string