open-nomad/scheduler/system_sched.go

package scheduler

import (
	"fmt"
	"log"

	memdb "github.com/hashicorp/go-memdb"
	"github.com/hashicorp/nomad/helper/uuid"
	"github.com/hashicorp/nomad/nomad/structs"
)

const (
	// maxSystemScheduleAttempts is used to limit the number of times
	// we will attempt to schedule if we continue to hit conflicts for system
	// jobs.
	maxSystemScheduleAttempts = 5

	// allocNodeTainted is the status used when stopping an alloc because it's
	// node is tainted.
	allocNodeTainted = "alloc not needed as node is tainted"
)

// SystemScheduler is used for 'system' jobs. This scheduler is
// designed for services that should be run on every client.
type SystemScheduler struct {
	logger  *log.Logger
	state   State
	planner Planner

	eval       *structs.Evaluation
	job        *structs.Job
	plan       *structs.Plan
	planResult *structs.PlanResult
	ctx        *EvalContext
	stack      *SystemStack
	nodes      []*structs.Node
	nodesByDC  map[string]int

	limitReached bool
	nextEval     *structs.Evaluation

	failedTGAllocs map[string]*structs.AllocMetric
	queuedAllocs   map[string]int
}

// NewSystemScheduler is a factory function to instantiate a new system
// scheduler.
func NewSystemScheduler(logger *log.Logger, state State, planner Planner) Scheduler {
	return &SystemScheduler{
		logger:  logger,
		state:   state,
		planner: planner,
	}
}

// Process is used to handle a single evaluation.
func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
	// Store the evaluation
	s.eval = eval

	// Verify the evaluation trigger reason is understood
	switch eval.TriggeredBy {
	case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,
		structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate,
		structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain:
	default:
		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
			eval.TriggeredBy)
		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusFailed, desc,
			s.queuedAllocs, "")
	}

	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
	progress := func() bool { return progressMade(s.planResult) }
	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
		if statusErr, ok := err.(*SetStatusError); ok {
			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, statusErr.EvalStatus, err.Error(),
				s.queuedAllocs, "")
		}
		return err
	}

	// Update the status to complete
	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusComplete, "",
		s.queuedAllocs, "")
}

// process is wrapped in retryMax to iteratively run the handler until we have no
// further work or we've made the maximum number of attempts.
func (s *SystemScheduler) process() (bool, error) {
	// Lookup the Job by ID
	var err error
	ws := memdb.NewWatchSet()
	s.job, err = s.state.JobByID(ws, s.eval.Namespace, s.eval.JobID)
	if err != nil {
		return false, fmt.Errorf("failed to get job '%s': %v",
			s.eval.JobID, err)
	}
	numTaskGroups := 0
	if !s.job.Stopped() {
		numTaskGroups = len(s.job.TaskGroups)
	}
	s.queuedAllocs = make(map[string]int, numTaskGroups)

	// Get the ready nodes in the required datacenters
	if !s.job.Stopped() {
		s.nodes, s.nodesByDC, err = readyNodesInDCs(s.state, s.job.Datacenters)
		if err != nil {
			return false, fmt.Errorf("failed to get ready nodes: %v", err)
		}
	}

	// Create a plan
	s.plan = s.eval.MakePlan(s.job)

	// Reset the failed allocations
	s.failedTGAllocs = nil

	// Create an evaluation context
	s.ctx = NewEvalContext(s.state, s.plan, s.logger)

	// Construct the placement stack
	s.stack = NewSystemStack(s.ctx)
	if !s.job.Stopped() {
		s.stack.SetJob(s.job)
	}

	// Compute the target job allocations
	if err := s.computeJobAllocs(); err != nil {
		s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err)
		return false, err
	}

	// If the plan is a no-op, we can bail. If AnnotatePlan is set submit the plan
	// anyways to get the annotations.
	if s.plan.IsNoOp() && !s.eval.AnnotatePlan {
		return true, nil
	}

	// If the limit of placements was reached we need to create an evaluation
	// to pickup from here after the stagger period.
	if s.limitReached && s.nextEval == nil {
		s.nextEval = s.eval.NextRollingEval(s.job.Update.Stagger)
		if err := s.planner.CreateEval(s.nextEval); err != nil {
			s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling update: %v", s.eval, err)
			return false, err
		}
		s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID)
	}

	// Submit the plan
	result, newState, err := s.planner.SubmitPlan(s.plan)
	s.planResult = result
	if err != nil {
		return false, err
	}

	// Decrement the number of allocations pending per task group based on the
	// number of allocations successfully placed
	adjustQueuedAllocations(s.logger, result, s.queuedAllocs)

	// If we got a state refresh, try again since we have stale data
	if newState != nil {
		s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval)
		s.state = newState
		return false, nil
	}

	// Try again if the plan was not fully committed, potential conflict
	fullCommit, expected, actual := result.FullCommit(s.plan)
	if !fullCommit {
		s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed",
			s.eval, expected, actual)
		return false, nil
	}

	// Success!
	return true, nil
}

// computeJobAllocs is used to reconcile differences between the job,
// existing allocations and node status to update the allocations.
func (s *SystemScheduler) computeJobAllocs() error {
	// Lookup the allocations by JobID
	ws := memdb.NewWatchSet()
	allocs, err := s.state.AllocsByJob(ws, s.eval.Namespace, s.eval.JobID, true)
	if err != nil {
		return fmt.Errorf("failed to get allocs for job '%s': %v",
			s.eval.JobID, err)
	}

	// Determine the tainted nodes containing job allocs
	tainted, err := taintedNodes(s.state, allocs)
	if err != nil {
		return fmt.Errorf("failed to get tainted nodes for job '%s': %v",
			s.eval.JobID, err)
	}

	// Update the allocations which are in pending/running state on tainted
	// nodes to lost
	updateNonTerminalAllocsToLost(s.plan, tainted, allocs)

	// Filter out the allocations in a terminal state
	allocs, terminalAllocs := structs.FilterTerminalAllocs(allocs)

	// Diff the required and existing allocations
	diff := diffSystemAllocs(s.job, s.nodes, tainted, allocs, terminalAllocs)
	s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, diff)

	// Add all the allocs to stop
	for _, e := range diff.stop {
		s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocNotNeeded, "")
	}

	// Lost allocations should be transitioned to desired status stop and client
	// status lost.
	for _, e := range diff.lost {
		s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocLost, structs.AllocClientStatusLost)
	}

	// Attempt to do the upgrades in place
	destructiveUpdates, inplaceUpdates := inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)
	diff.update = destructiveUpdates

	if s.eval.AnnotatePlan {
		s.plan.Annotations = &structs.PlanAnnotations{
			DesiredTGUpdates: desiredUpdates(diff, inplaceUpdates, destructiveUpdates),
		}
	}

	// Check if a rolling upgrade strategy is being used
	limit := len(diff.update)
	if !s.job.Stopped() && s.job.Update.Rolling() {
		limit = s.job.Update.MaxParallel
	}

	// Treat non in-place updates as an eviction and new placement.
	s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)

	// Nothing remaining to do if placement is not required
	if len(diff.place) == 0 {
		if !s.job.Stopped() {
			for _, tg := range s.job.TaskGroups {
				s.queuedAllocs[tg.Name] = 0
			}
		}
		return nil
	}

	// Record the number of allocations that needs to be placed per Task Group
	for _, allocTuple := range diff.place {
		s.queuedAllocs[allocTuple.TaskGroup.Name] += 1
	}

	// Compute the placements
	return s.computePlacements(diff.place)
}

// computePlacements computes placements for allocations
func (s *SystemScheduler) computePlacements(place []allocTuple) error {
	nodeByID := make(map[string]*structs.Node, len(s.nodes))
	for _, node := range s.nodes {
		nodeByID[node.ID] = node
	}

	nodes := make([]*structs.Node, 1)
	for _, missing := range place {
		node, ok := nodeByID[missing.Alloc.NodeID]
		if !ok {
			return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)
		}

		// Update the set of placement nodes
		nodes[0] = node
		s.stack.SetNodes(nodes)

		// Attempt to match the task group
		option, _ := s.stack.Select(missing.TaskGroup, nil)

		if option == nil {
			// If nodes were filtered because of constraint mismatches and we
			// couldn't create an allocation then decrementing queued for that
			// task group
			if s.ctx.metrics.NodesFiltered > 0 {
				s.queuedAllocs[missing.TaskGroup.Name] -= 1

				// If we are annotating the plan, then decrement the desired
				// placements based on whether the node meets the constraints
				if s.eval.AnnotatePlan && s.plan.Annotations != nil &&
					s.plan.Annotations.DesiredTGUpdates != nil {
					desired := s.plan.Annotations.DesiredTGUpdates[missing.TaskGroup.Name]
					desired.Place -= 1
				}
			}

			// Check if this task group has already failed
			if metric, ok := s.failedTGAllocs[missing.TaskGroup.Name]; ok {
				metric.CoalescedFailures += 1
				continue
			}
		}

		// Store the available nodes by datacenter
		s.ctx.Metrics().NodesAvailable = s.nodesByDC

		// Set fields based on if we found an allocation option
		if option != nil {
			// Create an allocation for this
			alloc := &structs.Allocation{
				ID:            uuid.Generate(),
				Namespace:     s.job.Namespace,
				EvalID:        s.eval.ID,
				Name:          missing.Name,
				JobID:         s.job.ID,
				TaskGroup:     missing.TaskGroup.Name,
				Metrics:       s.ctx.Metrics(),
				NodeID:        option.Node.ID,
				TaskResources: option.TaskResources,
				DesiredStatus: structs.AllocDesiredStatusRun,
				ClientStatus:  structs.AllocClientStatusPending,

				SharedResources: &structs.Resources{
					DiskMB: missing.TaskGroup.EphemeralDisk.SizeMB,
				},
			}

			// If the new allocation is replacing an older allocation then we
			// set the record the older allocation id so that they are chained
			if missing.Alloc != nil {
				alloc.PreviousAllocation = missing.Alloc.ID
			}

			s.plan.AppendAlloc(alloc)
		} else {
			// Lazy initialize the failed map
			if s.failedTGAllocs == nil {
				s.failedTGAllocs = make(map[string]*structs.AllocMetric)
			}

			s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
		}
	}

	return nil
}
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`package scheduler`

			`import (`
			`"fmt"`
			`"log"`

Nomad builds 2017-02-08 04:31:23 +00:00			`memdb "github.com/hashicorp/go-memdb"`
Remove `structs` import from `api` Goes a step further and removes structs import from api's tests as well by moving GenerateUUID to its own package. 2017-09-29 16:58:48 +00:00			`"github.com/hashicorp/nomad/helper/uuid"`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`"github.com/hashicorp/nomad/nomad/structs"`
			`)`

			`const (`
			`// maxSystemScheduleAttempts is used to limit the number of times`
			`// we will attempt to schedule if we continue to hit conflicts for system`
			`// jobs.`
Add negative test to DriverIterator, increase system scheduler attempts, and fix evictAndPlace status message 2015-10-16 18:36:26 +00:00			`maxSystemScheduleAttempts = 5`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
			`// allocNodeTainted is the status used when stopping an alloc because it's`
			`// node is tainted.`
Rolling node drains using max_parallel and stagger This PR adds rolling node drains done at max_parallel and stagger of the update spec. It brings it inline with old behavior. 2017-07-07 18:42:51 +00:00			`allocNodeTainted = "alloc not needed as node is tainted"`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`)`

			`// SystemScheduler is used for 'system' jobs. This scheduler is`
			`// designed for services that should be run on every client.`
			`type SystemScheduler struct {`
			`logger *log.Logger`
			`state State`
			`planner Planner`

Reset retry count if progress is made and fail by creating a blocked eval 2016-02-10 05:24:47 +00:00			`eval *structs.Evaluation`
			`job *structs.Job`
			`plan *structs.Plan`
			`planResult *structs.PlanResult`
			`ctx *EvalContext`
			`stack *SystemStack`
			`nodes []*structs.Node`
			`nodesByDC map[string]int`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
			`limitReached bool`
			`nextEval *structs.Evaluation`
track failed allocations properly 2016-06-15 19:58:19 +00:00
			`failedTGAllocs map[string]*structs.AllocMetric`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`queuedAllocs map[string]int`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`

			`// NewSystemScheduler is a factory function to instantiate a new system`
			`// scheduler.`
			`func NewSystemScheduler(logger *log.Logger, state State, planner Planner) Scheduler {`
			`return &SystemScheduler{`
Initializing the queued allocations late 2016-07-22 23:48:42 +00:00			`logger: logger,`
			`state: state,`
			`planner: planner,`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`
			`}`

			`// Process is used to handle a single evaluation.`
			`func (s SystemScheduler) Process(eval structs.Evaluation) error {`
			`// Store the evaluation`
			`s.eval = eval`

			`// Verify the evaluation trigger reason is understood`
			`switch eval.TriggeredBy {`
			`case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate,`
Client watches for allocation health using task state and Consul checks This PR adds watching of allocation health at the client. The client can watch for health based on the tasks running on time and also based on the consul checks passing. 2017-07-04 19:24:27 +00:00			`structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate,`
RPC, FSM, State Store for marking DesiredTransistion fix build tag 2018-02-21 18:58:04 +00:00			`structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain:`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`default:`
			`desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",`
			`eval.TriggeredBy)`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusFailed, desc,`
Attach eval id 2017-07-06 00:13:45 +00:00			`s.queuedAllocs, "")`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`

Reset retry count if progress is made and fail by creating a blocked eval 2016-02-10 05:24:47 +00:00			`// Retry up to the maxSystemScheduleAttempts and reset if progress is made.`
			`progress := func() bool { return progressMade(s.planResult) }`
			`if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if statusErr, ok := err.(*SetStatusError); ok {`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, statusErr.EvalStatus, err.Error(),`
Attach eval id 2017-07-06 00:13:45 +00:00			`s.queuedAllocs, "")`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`
			`return err`
			`}`

			`// Update the status to complete`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusComplete, "",`
Attach eval id 2017-07-06 00:13:45 +00:00			`s.queuedAllocs, "")`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`

			`// process is wrapped in retryMax to iteratively run the handler until we have no`
			`// further work or we've made the maximum number of attempts.`
			`func (s *SystemScheduler) process() (bool, error) {`
			`// Lookup the Job by ID`
			`var err error`
Nomad builds 2017-02-08 04:31:23 +00:00			`ws := memdb.NewWatchSet()`
Sync namespace changes 2017-09-07 23:56:15 +00:00			`s.job, err = s.state.JobByID(ws, s.eval.Namespace, s.eval.JobID)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if err != nil {`
			`return false, fmt.Errorf("failed to get job '%s': %v",`
			`s.eval.JobID, err)`
			`}`
Making the queued allocations bind late 2016-07-26 05:11:11 +00:00			`numTaskGroups := 0`
Respond to review comments 2017-04-19 17:54:03 +00:00			`if !s.job.Stopped() {`
Making the queued allocations bind late 2016-07-26 05:11:11 +00:00			`numTaskGroups = len(s.job.TaskGroups)`
			`}`
			`s.queuedAllocs = make(map[string]int, numTaskGroups)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
			`// Get the ready nodes in the required datacenters`
Respond to review comments 2017-04-19 17:54:03 +00:00			`if !s.job.Stopped() {`
Fix bug, add tests, and cli output 2016-01-04 22:23:06 +00:00			`s.nodes, s.nodesByDC, err = readyNodesInDCs(s.state, s.job.Datacenters)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if err != nil {`
			`return false, fmt.Errorf("failed to get ready nodes: %v", err)`
			`}`
			`}`

			`// Create a plan`
			`s.plan = s.eval.MakePlan(s.job)`

Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`// Reset the failed allocations`
track failed allocations properly 2016-06-15 19:58:19 +00:00			`s.failedTGAllocs = nil`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// Create an evaluation context`
			`s.ctx = NewEvalContext(s.state, s.plan, s.logger)`

			`// Construct the placement stack`
Remove base nodes from stack constructors 2015-10-17 00:05:23 +00:00			`s.stack = NewSystemStack(s.ctx)`
Respond to review comments 2017-04-19 17:54:03 +00:00			`if !s.job.Stopped() {`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`s.stack.SetJob(s.job)`
			`}`

			`// Compute the target job allocations`
			`if err := s.computeJobAllocs(); err != nil {`
			`s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err)`
			`return false, err`
			`}`

Initial plan endpoint implementation - WIP 2016-05-05 18:21:58 +00:00			`// If the plan is a no-op, we can bail. If AnnotatePlan is set submit the plan`
			`// anyways to get the annotations.`
			`if s.plan.IsNoOp() && !s.eval.AnnotatePlan {`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`return true, nil`
			`}`

			`// If the limit of placements was reached we need to create an evaluation`
			`// to pickup from here after the stagger period.`
			`if s.limitReached && s.nextEval == nil {`
			`s.nextEval = s.eval.NextRollingEval(s.job.Update.Stagger)`
			`if err := s.planner.CreateEval(s.nextEval); err != nil {`
			`s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling update: %v", s.eval, err)`
			`return false, err`
			`}`
			`s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID)`
			`}`

			`// Submit the plan`
			`result, newState, err := s.planner.SubmitPlan(s.plan)`
Reset retry count if progress is made and fail by creating a blocked eval 2016-02-10 05:24:47 +00:00			`s.planResult = result`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if err != nil {`
			`return false, err`
			`}`

Review comments 2016-07-22 06:13:07 +00:00			`// Decrement the number of allocations pending per task group based on the`
			`// number of allocations successfully placed`
Fixed some more tests 2016-07-22 21:53:49 +00:00			`adjustQueuedAllocations(s.logger, result, s.queuedAllocs)`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// If we got a state refresh, try again since we have stale data`
			`if newState != nil {`
			`s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval)`
			`s.state = newState`
			`return false, nil`
			`}`

			`// Try again if the plan was not fully committed, potential conflict`
			`fullCommit, expected, actual := result.FullCommit(s.plan)`
			`if !fullCommit {`
			`s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed",`
			`s.eval, expected, actual)`
			`return false, nil`
			`}`

			`// Success!`
			`return true, nil`
			`}`

			`// computeJobAllocs is used to reconcile differences between the job,`
			`// existing allocations and node status to update the allocations.`
			`func (s *SystemScheduler) computeJobAllocs() error {`
			`// Lookup the allocations by JobID`
Nomad builds 2017-02-08 04:31:23 +00:00			`ws := memdb.NewWatchSet()`
Sync namespace changes 2017-09-07 23:56:15 +00:00			`allocs, err := s.state.AllocsByJob(ws, s.eval.Namespace, s.eval.JobID, true)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if err != nil {`
			`return fmt.Errorf("failed to get allocs for job '%s': %v",`
			`s.eval.JobID, err)`
			`}`

			`// Determine the tainted nodes containing job allocs`
			`tainted, err := taintedNodes(s.state, allocs)`
			`if err != nil {`
			`return fmt.Errorf("failed to get tainted nodes for job '%s': %v",`
			`s.eval.JobID, err)`
			`}`

Added scheduler tests 2016-08-09 21:48:25 +00:00			`// Update the allocations which are in pending/running state on tainted`
			`// nodes to lost`
Marking allocations which are not terminal and are on down nodes as lost 2016-08-09 20:11:58 +00:00			`updateNonTerminalAllocsToLost(s.plan, tainted, allocs)`

			`// Filter out the allocations in a terminal state`
Implemented SetPrefferingNodes in stack 2016-08-30 22:36:30 +00:00			`allocs, terminalAllocs := structs.FilterTerminalAllocs(allocs)`
Marking allocations which are not terminal and are on down nodes as lost 2016-08-09 20:11:58 +00:00
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// Diff the required and existing allocations`
Implemented SetPrefferingNodes in stack 2016-08-30 22:36:30 +00:00			`diff := diffSystemAllocs(s.job, s.nodes, tainted, allocs, terminalAllocs)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, diff)`

			`// Add all the allocs to stop`
			`for _, e := range diff.stop {`
Make scheduler mark allocations as lost 2016-08-03 22:45:42 +00:00			`s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocNotNeeded, "")`
			`}`

Fix some typos 2017-12-13 17:36:03 +00:00			`// Lost allocations should be transitioned to desired status stop and client`
Make scheduler mark allocations as lost 2016-08-03 22:45:42 +00:00			`// status lost.`
			`for _, e := range diff.lost {`
			`s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocLost, structs.AllocClientStatusLost)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`

			`// Attempt to do the upgrades in place`
inplaceUpdate returns the allocs that were updated in-place 2016-05-17 22:37:37 +00:00			`destructiveUpdates, inplaceUpdates := inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update)`
Initial plan endpoint implementation - WIP 2016-05-05 18:21:58 +00:00			`diff.update = destructiveUpdates`

			`if s.eval.AnnotatePlan {`
			`s.plan.Annotations = &structs.PlanAnnotations{`
			`DesiredTGUpdates: desiredUpdates(diff, inplaceUpdates, destructiveUpdates),`
			`}`
			`}`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
			`// Check if a rolling upgrade strategy is being used`
			`limit := len(diff.update)`
Respond to review comments 2017-04-19 17:54:03 +00:00			`if !s.job.Stopped() && s.job.Update.Rolling() {`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`limit = s.job.Update.MaxParallel`
			`}`

			`// Treat non in-place updates as an eviction and new placement.`
Refactor shared code between schedulers 2015-10-15 00:26:20 +00:00			`s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
			`// Nothing remaining to do if placement is not required`
			`if len(diff.place) == 0 {`
Respond to review comments 2017-04-19 17:54:03 +00:00			`if !s.job.Stopped() {`
Updated tests and added logic to system sched 2016-07-28 21:02:50 +00:00			`for _, tg := range s.job.TaskGroups {`
			`s.queuedAllocs[tg.Name] = 0`
			`}`
			`}`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`return nil`
			`}`

Review comments 2016-07-22 06:13:07 +00:00			`// Record the number of allocations that needs to be placed per Task Group`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`for _, allocTuple := range diff.place {`
Review comments 2016-07-22 06:13:07 +00:00			`s.queuedAllocs[allocTuple.TaskGroup.Name] += 1`
Setting the number of queued allocations per task group 2016-07-18 22:04:05 +00:00			`}`

System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// Compute the placements`
			`return s.computePlacements(diff.place)`
			`}`

			`// computePlacements computes placements for allocations`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`func (s *SystemScheduler) computePlacements(place []allocTuple) error {`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`nodeByID := make(map[string]*structs.Node, len(s.nodes))`
			`for _, node := range s.nodes {`
			`nodeByID[node.ID] = node`
			`}`

			`nodes := make([]*structs.Node, 1)`
			`for _, missing := range place {`
Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`node, ok := nodeByID[missing.Alloc.NodeID]`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`if !ok {`
Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`

Store the available nodes in the alloc metric 2016-01-04 20:07:33 +00:00			`// Update the set of placement nodes`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`nodes[0] = node`
			`s.stack.SetNodes(nodes)`

			`// Attempt to match the task group`
Reschedule previous allocs and track their reschedule attempts 2018-01-14 22:47:21 +00:00			`option, _ := s.stack.Select(missing.TaskGroup, nil)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00
rename SpawnedBlockedEval and simplify map safety check 2016-05-25 01:12:59 +00:00			`if option == nil {`
Fix some typos 2017-12-13 17:36:03 +00:00			`// If nodes were filtered because of constraint mismatches and we`
Ensuring system sched doesn't increment queued count when nodes are filtered 2016-08-10 21:30:02 +00:00			`// couldn't create an allocation then decrementing queued for that`
			`// task group`
Fixed the logic of calculating queued allocation in sys sched (#1724) 2016-09-20 19:05:19 +00:00			`if s.ctx.metrics.NodesFiltered > 0 {`
Ensuring system sched doesn't increment queued count when nodes are filtered 2016-08-10 21:30:02 +00:00			`s.queuedAllocs[missing.TaskGroup.Name] -= 1`
Plan on system scheduler doesn't count nodes who don't meet constraints 2016-08-11 22:26:25 +00:00
			`// If we are annotating the plan, then decrement the desired`
			`// placements based on whether the node meets the constraints`
			`if s.eval.AnnotatePlan && s.plan.Annotations != nil &&`
			`s.plan.Annotations.DesiredTGUpdates != nil {`
			`desired := s.plan.Annotations.DesiredTGUpdates[missing.TaskGroup.Name]`
			`desired.Place -= 1`
			`}`
Ensuring system sched doesn't increment queued count when nodes are filtered 2016-08-10 21:30:02 +00:00			`}`
Plan on system scheduler doesn't count nodes who don't meet constraints 2016-08-11 22:26:25 +00:00
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// Check if this task group has already failed`
track failed allocations properly 2016-06-15 19:58:19 +00:00			`if metric, ok := s.failedTGAllocs[missing.TaskGroup.Name]; ok {`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`metric.CoalescedFailures += 1`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`continue`
			`}`
			`}`

Fix bug, add tests, and cli output 2016-01-04 22:23:06 +00:00			`// Store the available nodes by datacenter`
			`s.ctx.Metrics().NodesAvailable = s.nodesByDC`

System scheduler and system stack 2015-10-14 23:43:06 +00:00			`// Set fields based on if we found an allocation option`
			`if option != nil {`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`// Create an allocation for this`
			`alloc := &structs.Allocation{`
Remove `structs` import from `api` Goes a step further and removes structs import from api's tests as well by moving GenerateUUID to its own package. 2017-09-29 16:58:48 +00:00			`ID: uuid.Generate(),`
Sync namespace changes 2017-09-07 23:56:15 +00:00			`Namespace: s.job.Namespace,`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`EvalID: s.eval.ID,`
			`Name: missing.Name,`
			`JobID: s.job.ID,`
			`TaskGroup: missing.TaskGroup.Name,`
			`Metrics: s.ctx.Metrics(),`
			`NodeID: option.Node.ID,`
			`TaskResources: option.TaskResources,`
			`DesiredStatus: structs.AllocDesiredStatusRun,`
			`ClientStatus: structs.AllocClientStatusPending,`
Introducing shared resources in alloc 2016-08-29 19:49:52 +00:00
			`SharedResources: &structs.Resources{`
Renaming LocalDisk to EphemeralDisk (#1710) Renaming LocalDisk to EphemeralDisk 2016-09-14 22:43:42 +00:00			`DiskMB: missing.TaskGroup.EphemeralDisk.SizeMB,`
Introducing shared resources in alloc 2016-08-29 19:49:52 +00:00			`},`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`}`

Added the chained alloc for system scheduler 2016-08-16 17:49:45 +00:00			`// If the new allocation is replacing an older allocation then we`
			`// set the record the older allocation id so that they are chained`
			`if missing.Alloc != nil {`
			`alloc.PreviousAllocation = missing.Alloc.ID`
			`}`

System scheduler and system stack 2015-10-14 23:43:06 +00:00			`s.plan.AppendAlloc(alloc)`
			`} else {`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`// Lazy initialize the failed map`
track failed allocations properly 2016-06-15 19:58:19 +00:00			`if s.failedTGAllocs == nil {`
			`s.failedTGAllocs = make(map[string]*structs.AllocMetric)`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00			`}`

track failed allocations properly 2016-06-15 19:58:19 +00:00			`s.failedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`}`
			`}`
Scheduler no longer produces failed allocations; failed alloc metrics stored in evaluation 2016-05-19 01:11:40 +00:00
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`return nil`
			`}`