27256ebcc6
This PR makes placing new allocations count towards the limit. We do not restrict how many new placements are made by the limit but we still count towards the limit. This has the nice affect that if you have a group with count = 5 and max_parallel = 1 but only 3 allocs exist for it and a change is made, you will create 2 more at the new version but not destroy one, taking you down to two running as you would have previously. Fixes https://github.com/hashicorp/nomad/issues/3053
716 lines
23 KiB
Go
716 lines
23 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/helper"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
// allocUpdateType takes an existing allocation and a new job definition and
|
|
// returns whether the allocation can ignore the change, requires a destructive
|
|
// update, or can be inplace updated. If it can be inplace updated, an updated
|
|
// allocation that has the new resources and alloc metrics attached will be
|
|
// returned.
|
|
type allocUpdateType func(existing *structs.Allocation, newJob *structs.Job,
|
|
newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation)
|
|
|
|
// allocReconciler is used to determine the set of allocations that require
|
|
// placement, inplace updating or stopping given the job specification and
|
|
// existing cluster state. The reconciler should only be used for batch and
|
|
// service jobs.
|
|
type allocReconciler struct {
|
|
// logger is used to log debug information. Logging should be kept at a
|
|
// minimal here
|
|
logger *log.Logger
|
|
|
|
// canInplace is used to check if the allocation can be inplace upgraded
|
|
allocUpdateFn allocUpdateType
|
|
|
|
// batch marks whether the job is a batch job
|
|
batch bool
|
|
|
|
// job is the job being operated on, it may be nil if the job is being
|
|
// stopped via a purge
|
|
job *structs.Job
|
|
|
|
// jobID is the ID of the job being operated on. The job may be nil if it is
|
|
// being stopped so we require this separately.
|
|
jobID string
|
|
|
|
// oldDeployment is the last deployment for the job
|
|
oldDeployment *structs.Deployment
|
|
|
|
// deployment is the current deployment for the job
|
|
deployment *structs.Deployment
|
|
|
|
// deploymentPaused marks whether the deployment is paused
|
|
deploymentPaused bool
|
|
|
|
// deploymentFailed marks whether the deployment is failed
|
|
deploymentFailed bool
|
|
|
|
// taintedNodes contains a map of nodes that are tainted
|
|
taintedNodes map[string]*structs.Node
|
|
|
|
// existingAllocs is non-terminal existing allocations
|
|
existingAllocs []*structs.Allocation
|
|
|
|
// result is the results of the reconcile. During computation it can be
|
|
// used to store intermediate state
|
|
result *reconcileResults
|
|
}
|
|
|
|
// reconcileResults contains the results of the reconciliation and should be
|
|
// applied by the scheduler.
|
|
type reconcileResults struct {
|
|
// deployment is the deployment that should be created or updated as a
|
|
// result of scheduling
|
|
deployment *structs.Deployment
|
|
|
|
// deploymentUpdates contains a set of deployment updates that should be
|
|
// applied as a result of scheduling
|
|
deploymentUpdates []*structs.DeploymentStatusUpdate
|
|
|
|
// place is the set of allocations to place by the scheduler
|
|
place []allocPlaceResult
|
|
|
|
// destructiveUpdate is the set of allocations to apply a destructive update to
|
|
destructiveUpdate []allocDestructiveResult
|
|
|
|
// inplaceUpdate is the set of allocations to apply an inplace update to
|
|
inplaceUpdate []*structs.Allocation
|
|
|
|
// stop is the set of allocations to stop
|
|
stop []allocStopResult
|
|
|
|
// desiredTGUpdates captures the desired set of changes to make for each
|
|
// task group.
|
|
desiredTGUpdates map[string]*structs.DesiredUpdates
|
|
|
|
// followupEvalWait is set if there should be a followup eval run after the
|
|
// given duration
|
|
followupEvalWait time.Duration
|
|
}
|
|
|
|
func (r *reconcileResults) GoString() string {
|
|
base := fmt.Sprintf("Total changes: (place %d) (destructive %d) (inplace %d) (stop %d)",
|
|
len(r.place), len(r.destructiveUpdate), len(r.inplaceUpdate), len(r.stop))
|
|
|
|
if r.deployment != nil {
|
|
base += fmt.Sprintf("\nCreated Deployment: %q", r.deployment.ID)
|
|
}
|
|
for _, u := range r.deploymentUpdates {
|
|
base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q",
|
|
u.DeploymentID, u.Status, u.StatusDescription)
|
|
}
|
|
if r.followupEvalWait != 0 {
|
|
base += fmt.Sprintf("\nFollowup Eval in %v", r.followupEvalWait)
|
|
}
|
|
for tg, u := range r.desiredTGUpdates {
|
|
base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u)
|
|
}
|
|
return base
|
|
}
|
|
|
|
// Changes returns the number of total changes
|
|
func (r *reconcileResults) Changes() int {
|
|
return len(r.place) + len(r.inplaceUpdate) + len(r.stop)
|
|
}
|
|
|
|
// NewAllocReconciler creates a new reconciler that should be used to determine
|
|
// the changes required to bring the cluster state inline with the declared jobspec
|
|
func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch bool,
|
|
jobID string, job *structs.Job, deployment *structs.Deployment,
|
|
existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node) *allocReconciler {
|
|
|
|
return &allocReconciler{
|
|
logger: logger,
|
|
allocUpdateFn: allocUpdateFn,
|
|
batch: batch,
|
|
jobID: jobID,
|
|
job: job,
|
|
deployment: deployment.Copy(),
|
|
existingAllocs: existingAllocs,
|
|
taintedNodes: taintedNodes,
|
|
result: &reconcileResults{
|
|
desiredTGUpdates: make(map[string]*structs.DesiredUpdates),
|
|
},
|
|
}
|
|
}
|
|
|
|
// Compute reconciles the existing cluster state and returns the set of changes
|
|
// required to converge the job spec and state
|
|
func (a *allocReconciler) Compute() *reconcileResults {
|
|
// Create the allocation matrix
|
|
m := newAllocMatrix(a.job, a.existingAllocs)
|
|
|
|
// Handle stopping unneeded deployments
|
|
a.cancelDeployments()
|
|
|
|
// If we are just stopping a job we do not need to do anything more than
|
|
// stopping all running allocs
|
|
if a.job.Stopped() {
|
|
a.handleStop(m)
|
|
return a.result
|
|
}
|
|
|
|
// Detect if the deployment is paused
|
|
if a.deployment != nil {
|
|
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
|
|
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
|
|
}
|
|
|
|
// Reconcile each group
|
|
complete := true
|
|
for group, as := range m {
|
|
groupComplete := a.computeGroup(group, as)
|
|
complete = complete && groupComplete
|
|
}
|
|
|
|
// Mark the deployment as complete if possible
|
|
if a.deployment != nil && complete {
|
|
a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
|
|
DeploymentID: a.deployment.ID,
|
|
Status: structs.DeploymentStatusSuccessful,
|
|
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
|
|
})
|
|
}
|
|
|
|
// Set the description of a created deployment
|
|
if d := a.result.deployment; d != nil {
|
|
if d.RequiresPromotion() {
|
|
d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
|
|
}
|
|
}
|
|
|
|
return a.result
|
|
}
|
|
|
|
// cancelDeployments cancels any deployment that is not needed
|
|
func (a *allocReconciler) cancelDeployments() {
|
|
// If the job is stopped and there is a non-terminal deployment, cancel it
|
|
if a.job.Stopped() {
|
|
if a.deployment != nil && a.deployment.Active() {
|
|
a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
|
|
DeploymentID: a.deployment.ID,
|
|
Status: structs.DeploymentStatusCancelled,
|
|
StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
|
|
})
|
|
}
|
|
|
|
// Nothing else to do
|
|
a.oldDeployment = a.deployment
|
|
a.deployment = nil
|
|
return
|
|
}
|
|
|
|
d := a.deployment
|
|
if d == nil {
|
|
return
|
|
}
|
|
|
|
// Check if the deployment is active and referencing an older job and cancel it
|
|
if d.JobCreateIndex != a.job.CreateIndex || d.JobVersion != a.job.Version {
|
|
if d.Active() {
|
|
a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
|
|
DeploymentID: a.deployment.ID,
|
|
Status: structs.DeploymentStatusCancelled,
|
|
StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
|
|
})
|
|
}
|
|
|
|
a.oldDeployment = d
|
|
a.deployment = nil
|
|
}
|
|
|
|
// Clear it as the current deployment if it is successful
|
|
if d.Status == structs.DeploymentStatusSuccessful {
|
|
a.oldDeployment = d
|
|
a.deployment = nil
|
|
}
|
|
}
|
|
|
|
// handleStop marks all allocations to be stopped, handling the lost case
|
|
func (a *allocReconciler) handleStop(m allocMatrix) {
|
|
for group, as := range m {
|
|
untainted, migrate, lost := as.filterByTainted(a.taintedNodes)
|
|
a.markStop(untainted, "", allocNotNeeded)
|
|
a.markStop(migrate, "", allocNotNeeded)
|
|
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
|
desiredChanges := new(structs.DesiredUpdates)
|
|
desiredChanges.Stop = uint64(len(as))
|
|
a.result.desiredTGUpdates[group] = desiredChanges
|
|
}
|
|
}
|
|
|
|
// markStop is a helper for marking a set of allocation for stop with a
|
|
// particular client status and description.
|
|
func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) {
|
|
for _, alloc := range allocs {
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
clientStatus: clientStatus,
|
|
statusDescription: statusDescription,
|
|
})
|
|
}
|
|
}
|
|
|
|
// computeGroup reconciles state for a particular task group. It returns whether
|
|
// the deployment it is for is complete with regards to the task group.
|
|
func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
|
|
// Create the desired update object for the group
|
|
desiredChanges := new(structs.DesiredUpdates)
|
|
a.result.desiredTGUpdates[group] = desiredChanges
|
|
|
|
// Get the task group. The task group may be nil if the job was updates such
|
|
// that the task group no longer exists
|
|
tg := a.job.LookupTaskGroup(group)
|
|
|
|
// If the task group is nil, then the task group has been removed so all we
|
|
// need to do is stop everything
|
|
if tg == nil {
|
|
untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
|
|
a.markStop(untainted, "", allocNotNeeded)
|
|
a.markStop(migrate, "", allocNotNeeded)
|
|
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
|
desiredChanges.Stop = uint64(len(untainted) + len(migrate) + len(lost))
|
|
return true
|
|
}
|
|
|
|
// Get the deployment state for the group
|
|
var dstate *structs.DeploymentState
|
|
existingDeployment := false
|
|
if a.deployment != nil {
|
|
dstate, existingDeployment = a.deployment.TaskGroups[group]
|
|
}
|
|
if !existingDeployment {
|
|
autorevert := false
|
|
if tg.Update != nil && tg.Update.AutoRevert {
|
|
autorevert = true
|
|
}
|
|
dstate = &structs.DeploymentState{
|
|
AutoRevert: autorevert,
|
|
}
|
|
}
|
|
|
|
canaries, all := a.handleGroupCanaries(all, desiredChanges)
|
|
|
|
// Determine what set of allocations are on tainted nodes
|
|
untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
|
|
|
|
// Create a structure for choosing names. Seed with the taken names which is
|
|
// the union of untainted and migrating nodes (includes canaries)
|
|
nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate))
|
|
|
|
// Stop any unneeded allocations and update the untainted set to not
|
|
// included stopped allocations.
|
|
canaryState := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
|
|
stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState)
|
|
desiredChanges.Stop += uint64(len(stop))
|
|
untainted = untainted.difference(stop)
|
|
|
|
// Having stopped un-needed allocations, append the canaries to the existing
|
|
// set of untainted because they are promoted. This will cause them to be
|
|
// treated like non-canaries
|
|
if !canaryState {
|
|
untainted = untainted.union(canaries)
|
|
nameIndex.Set(canaries)
|
|
}
|
|
|
|
// Do inplace upgrades where possible and capture the set of upgrades that
|
|
// need to be done destructively.
|
|
ignore, inplace, destructive := a.computeUpdates(tg, untainted)
|
|
desiredChanges.Ignore += uint64(len(ignore))
|
|
desiredChanges.InPlaceUpdate += uint64(len(inplace))
|
|
if !existingDeployment {
|
|
dstate.DesiredTotal += len(destructive) + len(inplace)
|
|
}
|
|
|
|
// The fact that we have destructive updates and have less canaries than is
|
|
// desired means we need to create canaries
|
|
numDestructive := len(destructive)
|
|
strategy := tg.Update
|
|
canariesPromoted := dstate != nil && dstate.Promoted
|
|
requireCanary := numDestructive != 0 && strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted
|
|
if requireCanary && !a.deploymentPaused && !a.deploymentFailed {
|
|
number := strategy.Canary - len(canaries)
|
|
number = helper.IntMin(numDestructive, number)
|
|
desiredChanges.Canary += uint64(number)
|
|
if !existingDeployment {
|
|
dstate.DesiredCanaries = strategy.Canary
|
|
}
|
|
|
|
for _, name := range nameIndex.NextCanaries(uint(number), canaries, destructive) {
|
|
a.result.place = append(a.result.place, allocPlaceResult{
|
|
name: name,
|
|
canary: true,
|
|
taskGroup: tg,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Determine how many we can place
|
|
canaryState = dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
|
|
limit := a.computeLimit(tg, untainted, destructive, migrate, canaryState)
|
|
|
|
// Place if:
|
|
// * The deployment is not paused or failed
|
|
// * Not placing any canaries
|
|
// * If there are any canaries that they have been promoted
|
|
place := a.computePlacements(tg, nameIndex, untainted, migrate)
|
|
if !existingDeployment {
|
|
dstate.DesiredTotal += len(place)
|
|
}
|
|
|
|
// deploymentPlaceReady tracks whether the deployment is in a state where
|
|
// placements can be made without any other consideration.
|
|
deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState
|
|
|
|
if deploymentPlaceReady {
|
|
desiredChanges.Place += uint64(len(place))
|
|
for _, p := range place {
|
|
a.result.place = append(a.result.place, p)
|
|
}
|
|
|
|
min := helper.IntMin(len(place), limit)
|
|
limit -= min
|
|
} else if !deploymentPlaceReady && len(lost) != 0 {
|
|
// We are in a situation where we shouldn't be placing more than we need
|
|
// to but we have lost allocations. It is a very weird user experience
|
|
// if you have a node go down and Nomad doesn't replace the allocations
|
|
// because the deployment is paused/failed so we only place to recover
|
|
// the lost allocations.
|
|
allowed := helper.IntMin(len(lost), len(place))
|
|
desiredChanges.Place += uint64(allowed)
|
|
for _, p := range place[:allowed] {
|
|
a.result.place = append(a.result.place, p)
|
|
}
|
|
}
|
|
|
|
if deploymentPlaceReady {
|
|
// Do all destructive updates
|
|
min := helper.IntMin(len(destructive), limit)
|
|
limit -= min
|
|
desiredChanges.DestructiveUpdate += uint64(min)
|
|
desiredChanges.Ignore += uint64(len(destructive) - min)
|
|
for _, alloc := range destructive.nameOrder()[:min] {
|
|
a.result.destructiveUpdate = append(a.result.destructiveUpdate, allocDestructiveResult{
|
|
placeName: alloc.Name,
|
|
placeTaskGroup: tg,
|
|
stopAlloc: alloc,
|
|
stopStatusDescription: allocUpdating,
|
|
})
|
|
}
|
|
} else {
|
|
desiredChanges.Ignore += uint64(len(destructive))
|
|
}
|
|
|
|
// Calculate the allowed number of changes and set the desired changes
|
|
// accordingly.
|
|
min := helper.IntMin(len(migrate), limit)
|
|
if !a.deploymentFailed && !a.deploymentPaused {
|
|
desiredChanges.Migrate += uint64(min)
|
|
desiredChanges.Ignore += uint64(len(migrate) - min)
|
|
} else {
|
|
desiredChanges.Stop += uint64(len(migrate))
|
|
}
|
|
|
|
followup := false
|
|
migrated := 0
|
|
for _, alloc := range migrate.nameOrder() {
|
|
// If the deployment is failed or paused, don't replace it, just mark as stop.
|
|
if a.deploymentFailed || a.deploymentPaused {
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
statusDescription: allocNodeTainted,
|
|
})
|
|
continue
|
|
}
|
|
|
|
if migrated >= limit {
|
|
followup = true
|
|
break
|
|
}
|
|
|
|
migrated++
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
statusDescription: allocMigrating,
|
|
})
|
|
a.result.place = append(a.result.place, allocPlaceResult{
|
|
name: alloc.Name,
|
|
canary: false,
|
|
taskGroup: tg,
|
|
previousAlloc: alloc,
|
|
})
|
|
}
|
|
|
|
// We need to create a followup evaluation.
|
|
if followup && strategy != nil && a.result.followupEvalWait < strategy.Stagger {
|
|
a.result.followupEvalWait = strategy.Stagger
|
|
}
|
|
|
|
// Create a new deployment if necessary
|
|
if !existingDeployment && strategy != nil && dstate.DesiredTotal != 0 {
|
|
// A previous group may have made the deployment already
|
|
if a.deployment == nil {
|
|
a.deployment = structs.NewDeployment(a.job)
|
|
a.result.deployment = a.deployment
|
|
}
|
|
|
|
// Attach the groups deployment state to the deployment
|
|
a.deployment.TaskGroups[group] = dstate
|
|
}
|
|
|
|
// deploymentComplete is whether the deployment is complete which largely
|
|
// means that no placements were made or desired to be made
|
|
deploymentComplete := len(destructive)+len(inplace)+len(place)+len(migrate) == 0 && !requireCanary
|
|
|
|
// Final check to see if the deployment is complete is to ensure everything
|
|
// is healthy
|
|
if deploymentComplete && a.deployment != nil {
|
|
partOf, _ := untainted.filterByDeployment(a.deployment.ID)
|
|
for _, alloc := range partOf {
|
|
if !alloc.DeploymentStatus.IsHealthy() {
|
|
deploymentComplete = false
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return deploymentComplete
|
|
}
|
|
|
|
// handleGroupCanaries handles the canaries for the group by stopping the
|
|
// unneeded ones and returning the current set of canaries and the updated total
|
|
// set of allocs for the group
|
|
func (a *allocReconciler) handleGroupCanaries(all allocSet, desiredChanges *structs.DesiredUpdates) (canaries, newAll allocSet) {
|
|
// Stop any canary from an older deployment or from a failed one
|
|
var stop []string
|
|
|
|
// Cancel any non-promoted canaries from the older deployment
|
|
if a.oldDeployment != nil {
|
|
for _, s := range a.oldDeployment.TaskGroups {
|
|
if !s.Promoted {
|
|
stop = append(stop, s.PlacedCanaries...)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Cancel any non-promoted canaries from a failed deployment
|
|
if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusFailed {
|
|
for _, s := range a.deployment.TaskGroups {
|
|
if !s.Promoted {
|
|
stop = append(stop, s.PlacedCanaries...)
|
|
}
|
|
}
|
|
}
|
|
|
|
// stopSet is the allocSet that contains the canaries we desire to stop from
|
|
// above.
|
|
stopSet := all.fromKeys(stop)
|
|
a.markStop(stopSet, "", allocNotNeeded)
|
|
desiredChanges.Stop += uint64(len(stopSet))
|
|
all = all.difference(stopSet)
|
|
|
|
// Capture our current set of canaries and handle any migrations that are
|
|
// needed by just stopping them.
|
|
if a.deployment != nil {
|
|
var canaryIDs []string
|
|
for _, s := range a.deployment.TaskGroups {
|
|
canaryIDs = append(canaryIDs, s.PlacedCanaries...)
|
|
}
|
|
|
|
canaries = all.fromKeys(canaryIDs)
|
|
untainted, migrate, lost := canaries.filterByTainted(a.taintedNodes)
|
|
a.markStop(migrate, "", allocMigrating)
|
|
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
|
|
|
canaries = untainted
|
|
all = all.difference(migrate, lost)
|
|
}
|
|
|
|
return canaries, all
|
|
}
|
|
|
|
// computeLimit returns the placement limit for a particular group. The inputs
|
|
// are the group definition, the untainted, destructive, and migrate allocation
|
|
// set and whether we are in a canary state.
|
|
func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, destructive, migrate allocSet, canaryState bool) int {
|
|
// If there is no update stategy or deployment for the group we can deploy
|
|
// as many as the group has
|
|
if group.Update == nil || len(destructive)+len(migrate) == 0 {
|
|
return group.Count
|
|
} else if a.deploymentPaused || a.deploymentFailed {
|
|
// If the deployment is paused or failed, do not create anything else
|
|
return 0
|
|
}
|
|
|
|
// If we have canaries and they have not been promoted the limit is 0
|
|
if canaryState {
|
|
return 0
|
|
}
|
|
|
|
// If we have been promoted or there are no canaries, the limit is the
|
|
// configured MaxParallel minus any outstanding non-healthy alloc for the
|
|
// deployment
|
|
limit := group.Update.MaxParallel
|
|
if a.deployment != nil {
|
|
partOf, _ := untainted.filterByDeployment(a.deployment.ID)
|
|
for _, alloc := range partOf {
|
|
// An unhealthy allocation means nothing else should be happen.
|
|
if alloc.DeploymentStatus.IsUnhealthy() {
|
|
return 0
|
|
}
|
|
|
|
if !alloc.DeploymentStatus.IsHealthy() {
|
|
limit--
|
|
}
|
|
}
|
|
}
|
|
|
|
// The limit can be less than zero in the case that the job was changed such
|
|
// that it required destructive changes and the count was scaled up.
|
|
if limit < 0 {
|
|
return 0
|
|
}
|
|
|
|
return limit
|
|
}
|
|
|
|
// computePlacement returns the set of allocations to place given the group
|
|
// definition, the set of untainted and migrating allocations for the group.
|
|
func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
|
|
nameIndex *allocNameIndex, untainted, migrate allocSet) []allocPlaceResult {
|
|
|
|
// Hot path the nothing to do case
|
|
existing := len(untainted) + len(migrate)
|
|
if existing >= group.Count {
|
|
return nil
|
|
}
|
|
|
|
var place []allocPlaceResult
|
|
for _, name := range nameIndex.Next(uint(group.Count - existing)) {
|
|
place = append(place, allocPlaceResult{
|
|
name: name,
|
|
taskGroup: group,
|
|
})
|
|
}
|
|
|
|
return place
|
|
}
|
|
|
|
// computeStop returns the set of allocations that are marked for stopping given
|
|
// the group definition, the set of allocations in various states and whether we
|
|
// are canarying.
|
|
func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex,
|
|
untainted, migrate, lost, canaries allocSet, canaryState bool) allocSet {
|
|
|
|
// Mark all lost allocations for stop. Previous allocation doesn't matter
|
|
// here since it is on a lost node
|
|
var stop allocSet
|
|
stop = stop.union(lost)
|
|
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
|
|
|
// If we are still deploying or creating canaries, don't stop them
|
|
if canaryState {
|
|
untainted = untainted.difference(canaries)
|
|
}
|
|
|
|
// Hot path the nothing to do case
|
|
remove := len(untainted) + len(migrate) - group.Count
|
|
if remove <= 0 {
|
|
return stop
|
|
}
|
|
|
|
// Prefer stopping any alloc that has the same name as the canaries if we
|
|
// are promoted
|
|
if !canaryState && len(canaries) != 0 {
|
|
canaryNames := canaries.nameSet()
|
|
for id, alloc := range untainted.difference(canaries) {
|
|
if _, match := canaryNames[alloc.Name]; match {
|
|
stop[id] = alloc
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
statusDescription: allocNotNeeded,
|
|
})
|
|
delete(untainted, id)
|
|
|
|
remove--
|
|
if remove == 0 {
|
|
return stop
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Prefer selecting from the migrating set before stopping existing allocs
|
|
if len(migrate) != 0 {
|
|
mNames := newAllocNameIndex(a.jobID, group.Name, group.Count, migrate)
|
|
removeNames := mNames.Highest(uint(remove))
|
|
for id, alloc := range migrate {
|
|
if _, match := removeNames[alloc.Name]; !match {
|
|
continue
|
|
}
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
statusDescription: allocNotNeeded,
|
|
})
|
|
delete(migrate, id)
|
|
stop[id] = alloc
|
|
nameIndex.UnsetIndex(alloc.Index())
|
|
|
|
remove--
|
|
if remove == 0 {
|
|
return stop
|
|
}
|
|
}
|
|
}
|
|
|
|
// Select the allocs with the highest count to remove
|
|
removeNames := nameIndex.Highest(uint(remove))
|
|
for id, alloc := range untainted {
|
|
if _, remove := removeNames[alloc.Name]; remove {
|
|
stop[id] = alloc
|
|
a.result.stop = append(a.result.stop, allocStopResult{
|
|
alloc: alloc,
|
|
statusDescription: allocNotNeeded,
|
|
})
|
|
}
|
|
}
|
|
|
|
return stop
|
|
}
|
|
|
|
// computeUpdates determines which allocations for the passed group require
|
|
// updates. Three groups are returned:
|
|
// 1. Those that require no upgrades
|
|
// 2. Those that can be upgraded in-place. These are added to the results
|
|
// automatically since the function contains the correct state to do so,
|
|
// 3. Those that require destructive updates
|
|
func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) {
|
|
// Determine the set of allocations that need to be updated
|
|
ignore = make(map[string]*structs.Allocation)
|
|
inplace = make(map[string]*structs.Allocation)
|
|
destructive = make(map[string]*structs.Allocation)
|
|
|
|
for _, alloc := range untainted {
|
|
ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.job, group)
|
|
if ignoreChange {
|
|
ignore[alloc.ID] = alloc
|
|
} else if destructiveChange {
|
|
destructive[alloc.ID] = alloc
|
|
} else {
|
|
// Attach the deployment ID and and clear the health if the
|
|
// deployment has changed
|
|
inplace[alloc.ID] = alloc
|
|
a.result.inplaceUpdate = append(a.result.inplaceUpdate, inplaceAlloc)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|