Set Reschedule from deployment watcher
This commit is contained in:
parent
a510774451
commit
8a81038cdb
|
@ -214,6 +214,10 @@ type DesiredTransition struct {
|
|||
// Migrate is used to indicate that this allocation should be stopped and
|
||||
// migrated to another node.
|
||||
Migrate *bool
|
||||
|
||||
// Reschedule is used to indicate that this allocation is eligible to be
|
||||
// rescheduled.
|
||||
Reschedule *bool
|
||||
}
|
||||
|
||||
// ShouldMigrate returns whether the transition object dictates a migration.
|
||||
|
|
|
@ -22,6 +22,15 @@ const (
|
|||
perJobEvalBatchPeriod = 1 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
// allowRescheduleTransistion is the transistion that allows failed
|
||||
// allocations part of a deployment to be rescheduled. We create a one off
|
||||
// variable to avoid creating a new object for every request.
|
||||
allowRescheduleTransistion = &structs.DesiredTransition{
|
||||
Reschedule: helper.BoolToPtr(true),
|
||||
}
|
||||
)
|
||||
|
||||
// deploymentTriggers are the set of functions required to trigger changes on
|
||||
// behalf of a deployment
|
||||
type deploymentTriggers interface {
|
||||
|
@ -70,9 +79,13 @@ type deploymentWatcher struct {
|
|||
j *structs.Job
|
||||
|
||||
// outstandingBatch marks whether an outstanding function exists to create
|
||||
// the evaluation. Access should be done through the lock
|
||||
// the evaluation. Access should be done through the lock.
|
||||
outstandingBatch bool
|
||||
|
||||
// outstandingAllowReplacements is the map of allocations that will be
|
||||
// marked as allowing a replacement. Access should be done through the lock.
|
||||
outstandingAllowReplacements map[string]*structs.DesiredTransition
|
||||
|
||||
// latestEval is the latest eval for the job. It is updated by the watch
|
||||
// loop and any time an evaluation is created. The field should be accessed
|
||||
// by holding the lock or using the setter and getter methods.
|
||||
|
@ -429,8 +442,8 @@ FAIL:
|
|||
}
|
||||
|
||||
// Create an eval to push the deployment along
|
||||
if res.createEval {
|
||||
w.createEvalBatched(allocIndex)
|
||||
if res.createEval || len(res.allowReplacements) != 0 {
|
||||
w.createBatchedUpdate(res.allowReplacements, allocIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -472,9 +485,10 @@ FAIL:
|
|||
// allocUpdateResult is used to return the desired actions given the newest set
|
||||
// of allocations for the deployment.
|
||||
type allocUpdateResult struct {
|
||||
createEval bool
|
||||
failDeployment bool
|
||||
rollback bool
|
||||
createEval bool
|
||||
failDeployment bool
|
||||
rollback bool
|
||||
allowReplacements []string
|
||||
}
|
||||
|
||||
// handleAllocUpdate is used to compute the set of actions to take based on the
|
||||
|
@ -504,13 +518,18 @@ func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (
|
|||
continue
|
||||
}
|
||||
|
||||
// Determine if the update stanza for this group is progress based
|
||||
progressBased := dstate.ProgressDeadline != 0
|
||||
|
||||
// We need to create an eval so the job can progress.
|
||||
if alloc.DeploymentStatus.IsHealthy() {
|
||||
res.createEval = true
|
||||
} else if progressBased && alloc.DeploymentStatus.IsUnhealthy() && deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
|
||||
res.allowReplacements = append(res.allowReplacements, alloc.ID)
|
||||
}
|
||||
|
||||
// If the group is using a deadline, we don't have to do anything.
|
||||
if dstate.ProgressDeadline != 0 {
|
||||
// If the group is using a progress deadline, we don't have to do anything.
|
||||
if progressBased {
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -601,12 +620,21 @@ func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
|
|||
return stable, nil
|
||||
}
|
||||
|
||||
// createEvalBatched creates an eval but batches calls together
|
||||
func (w *deploymentWatcher) createEvalBatched(forIndex uint64) {
|
||||
// createBatchedUpdate creates an eval for the given index as well as updating
|
||||
// the given allocations to allow them to reschedule.
|
||||
func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
|
||||
w.l.Lock()
|
||||
defer w.l.Unlock()
|
||||
|
||||
if w.outstandingBatch || forIndex < w.latestEval {
|
||||
// Store the allocations that can be replaced
|
||||
for _, allocID := range allowReplacements {
|
||||
if w.outstandingAllowReplacements == nil {
|
||||
w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
|
||||
}
|
||||
w.outstandingAllowReplacements[allocID] = allowRescheduleTransistion
|
||||
}
|
||||
|
||||
if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -621,17 +649,18 @@ func (w *deploymentWatcher) createEvalBatched(forIndex uint64) {
|
|||
default:
|
||||
}
|
||||
|
||||
w.l.Lock()
|
||||
replacements := w.outstandingAllowReplacements
|
||||
w.outstandingAllowReplacements = nil
|
||||
w.outstandingBatch = false
|
||||
w.l.Unlock()
|
||||
|
||||
// Create the eval
|
||||
if index, err := w.createUpdate(nil, w.getEval()); err != nil {
|
||||
if index, err := w.createUpdate(replacements, w.getEval()); err != nil {
|
||||
w.logger.Printf("[ERR] nomad.deployment_watcher: failed to create evaluation for deployment %q: %v", w.deploymentID, err)
|
||||
} else {
|
||||
w.setLatestEval(index)
|
||||
}
|
||||
|
||||
w.l.Lock()
|
||||
w.outstandingBatch = false
|
||||
w.l.Unlock()
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -5588,6 +5588,13 @@ type DesiredTransition struct {
|
|||
// Migrate is used to indicate that this allocation should be stopped and
|
||||
// migrated to another node.
|
||||
Migrate *bool
|
||||
|
||||
// Reschedule is used to indicate that this allocation is eligible to be
|
||||
// rescheduled. Most allocations are automatically eligible for
|
||||
// rescheduling, so this field is only required when an allocation is not
|
||||
// automatically eligible. An example is an allocation that is part of a
|
||||
// deployment.
|
||||
Reschedule *bool
|
||||
}
|
||||
|
||||
// Merge merges the two desired transitions, preferring the values from the
|
||||
|
@ -5596,6 +5603,10 @@ func (d *DesiredTransition) Merge(o *DesiredTransition) {
|
|||
if o.Migrate != nil {
|
||||
d.Migrate = o.Migrate
|
||||
}
|
||||
|
||||
if o.Reschedule != nil {
|
||||
d.Reschedule = o.Reschedule
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldMigrate returns whether the transition object dictates a migration.
|
||||
|
@ -5603,6 +5614,12 @@ func (d *DesiredTransition) ShouldMigrate() bool {
|
|||
return d.Migrate != nil && *d.Migrate
|
||||
}
|
||||
|
||||
// ShouldReschedule returns whether the transition object dictates a
|
||||
// rescheduling.
|
||||
func (d *DesiredTransition) ShouldReschedule() bool {
|
||||
return d.Reschedule != nil && *d.Reschedule
|
||||
}
|
||||
|
||||
const (
|
||||
AllocDesiredStatusRun = "run" // Allocation should run
|
||||
AllocDesiredStatusStop = "stop" // Allocation should stop
|
||||
|
@ -6022,6 +6039,7 @@ func (a *Allocation) Stub() *AllocListStub {
|
|||
DesiredDescription: a.DesiredDescription,
|
||||
ClientStatus: a.ClientStatus,
|
||||
ClientDescription: a.ClientDescription,
|
||||
DesiredTransition: a.DesiredTransition,
|
||||
TaskStates: a.TaskStates,
|
||||
DeploymentStatus: a.DeploymentStatus,
|
||||
FollowupEvalID: a.FollowupEvalID,
|
||||
|
@ -6046,6 +6064,7 @@ type AllocListStub struct {
|
|||
DesiredDescription string
|
||||
ClientStatus string
|
||||
ClientDescription string
|
||||
DesiredTransition DesiredTransition
|
||||
TaskStates map[string]*TaskState
|
||||
DeploymentStatus *AllocDeploymentStatus
|
||||
FollowupEvalID string
|
||||
|
|
Loading…
Reference in a new issue