Set Reschedule from deployment watcher

This commit is contained in:
Alex Dadgar 2018-04-06 17:23:35 -07:00 committed by Preetha Appan
parent a510774451
commit 8a81038cdb
No known key found for this signature in database
GPG key ID: 9F7C19990A50EAFC
3 changed files with 69 additions and 17 deletions

View file

@ -214,6 +214,10 @@ type DesiredTransition struct {
// Migrate is used to indicate that this allocation should be stopped and
// migrated to another node.
Migrate *bool
// Reschedule is used to indicate that this allocation is eligible to be
// rescheduled.
Reschedule *bool
}
// ShouldMigrate returns whether the transition object dictates a migration.

View file

@ -22,6 +22,15 @@ const (
perJobEvalBatchPeriod = 1 * time.Second
)
var (
// allowRescheduleTransistion is the transistion that allows failed
// allocations part of a deployment to be rescheduled. We create a one off
// variable to avoid creating a new object for every request.
allowRescheduleTransistion = &structs.DesiredTransition{
Reschedule: helper.BoolToPtr(true),
}
)
// deploymentTriggers are the set of functions required to trigger changes on
// behalf of a deployment
type deploymentTriggers interface {
@ -70,9 +79,13 @@ type deploymentWatcher struct {
j *structs.Job
// outstandingBatch marks whether an outstanding function exists to create
// the evaluation. Access should be done through the lock
// the evaluation. Access should be done through the lock.
outstandingBatch bool
// outstandingAllowReplacements is the map of allocations that will be
// marked as allowing a replacement. Access should be done through the lock.
outstandingAllowReplacements map[string]*structs.DesiredTransition
// latestEval is the latest eval for the job. It is updated by the watch
// loop and any time an evaluation is created. The field should be accessed
// by holding the lock or using the setter and getter methods.
@ -429,8 +442,8 @@ FAIL:
}
// Create an eval to push the deployment along
if res.createEval {
w.createEvalBatched(allocIndex)
if res.createEval || len(res.allowReplacements) != 0 {
w.createBatchedUpdate(res.allowReplacements, allocIndex)
}
}
}
@ -472,9 +485,10 @@ FAIL:
// allocUpdateResult is used to return the desired actions given the newest set
// of allocations for the deployment.
type allocUpdateResult struct {
createEval bool
failDeployment bool
rollback bool
createEval bool
failDeployment bool
rollback bool
allowReplacements []string
}
// handleAllocUpdate is used to compute the set of actions to take based on the
@ -504,13 +518,18 @@ func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (
continue
}
// Determine if the update stanza for this group is progress based
progressBased := dstate.ProgressDeadline != 0
// We need to create an eval so the job can progress.
if alloc.DeploymentStatus.IsHealthy() {
res.createEval = true
} else if progressBased && alloc.DeploymentStatus.IsUnhealthy() && deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
res.allowReplacements = append(res.allowReplacements, alloc.ID)
}
// If the group is using a deadline, we don't have to do anything.
if dstate.ProgressDeadline != 0 {
// If the group is using a progress deadline, we don't have to do anything.
if progressBased {
continue
}
@ -601,12 +620,21 @@ func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
return stable, nil
}
// createEvalBatched creates an eval but batches calls together
func (w *deploymentWatcher) createEvalBatched(forIndex uint64) {
// createBatchedUpdate creates an eval for the given index as well as updating
// the given allocations to allow them to reschedule.
func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
w.l.Lock()
defer w.l.Unlock()
if w.outstandingBatch || forIndex < w.latestEval {
// Store the allocations that can be replaced
for _, allocID := range allowReplacements {
if w.outstandingAllowReplacements == nil {
w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
}
w.outstandingAllowReplacements[allocID] = allowRescheduleTransistion
}
if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
return
}
@ -621,17 +649,18 @@ func (w *deploymentWatcher) createEvalBatched(forIndex uint64) {
default:
}
w.l.Lock()
replacements := w.outstandingAllowReplacements
w.outstandingAllowReplacements = nil
w.outstandingBatch = false
w.l.Unlock()
// Create the eval
if index, err := w.createUpdate(nil, w.getEval()); err != nil {
if index, err := w.createUpdate(replacements, w.getEval()); err != nil {
w.logger.Printf("[ERR] nomad.deployment_watcher: failed to create evaluation for deployment %q: %v", w.deploymentID, err)
} else {
w.setLatestEval(index)
}
w.l.Lock()
w.outstandingBatch = false
w.l.Unlock()
})
}

View file

@ -5588,6 +5588,13 @@ type DesiredTransition struct {
// Migrate is used to indicate that this allocation should be stopped and
// migrated to another node.
Migrate *bool
// Reschedule is used to indicate that this allocation is eligible to be
// rescheduled. Most allocations are automatically eligible for
// rescheduling, so this field is only required when an allocation is not
// automatically eligible. An example is an allocation that is part of a
// deployment.
Reschedule *bool
}
// Merge merges the two desired transitions, preferring the values from the
@ -5596,6 +5603,10 @@ func (d *DesiredTransition) Merge(o *DesiredTransition) {
if o.Migrate != nil {
d.Migrate = o.Migrate
}
if o.Reschedule != nil {
d.Reschedule = o.Reschedule
}
}
// ShouldMigrate returns whether the transition object dictates a migration.
@ -5603,6 +5614,12 @@ func (d *DesiredTransition) ShouldMigrate() bool {
return d.Migrate != nil && *d.Migrate
}
// ShouldReschedule returns whether the transition object dictates a
// rescheduling.
func (d *DesiredTransition) ShouldReschedule() bool {
return d.Reschedule != nil && *d.Reschedule
}
const (
AllocDesiredStatusRun = "run" // Allocation should run
AllocDesiredStatusStop = "stop" // Allocation should stop
@ -6022,6 +6039,7 @@ func (a *Allocation) Stub() *AllocListStub {
DesiredDescription: a.DesiredDescription,
ClientStatus: a.ClientStatus,
ClientDescription: a.ClientDescription,
DesiredTransition: a.DesiredTransition,
TaskStates: a.TaskStates,
DeploymentStatus: a.DeploymentStatus,
FollowupEvalID: a.FollowupEvalID,
@ -6046,6 +6064,7 @@ type AllocListStub struct {
DesiredDescription string
ClientStatus string
ClientDescription string
DesiredTransition DesiredTransition
TaskStates map[string]*TaskState
DeploymentStatus *AllocDeploymentStatus
FollowupEvalID string