open-nomad/scheduler/util.go

package scheduler

import (
	"fmt"
	"log"
	"math/rand"
	"reflect"

	"github.com/hashicorp/nomad/nomad/structs"
)

// allocTuple is a tuple of the allocation name and potential alloc ID
type allocTuple struct {
	Name      string
	TaskGroup *structs.TaskGroup
	Alloc     *structs.Allocation
}

// materializeTaskGroups is used to materialize all the task groups
// a job requires. This is used to do the count expansion.
func materializeTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
	out := make(map[string]*structs.TaskGroup)
	if job == nil {
		return out
	}

	for _, tg := range job.TaskGroups {
		for i := 0; i < tg.Count; i++ {
			name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
			out[name] = tg
		}
	}
	return out
}

// diffResult is used to return the sets that result from the diff
type diffResult struct {
	place, update, migrate, stop, ignore []allocTuple
}

func (d *diffResult) GoString() string {
	return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d)",
		len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))
}

func (d *diffResult) Append(other *diffResult) {
	d.place = append(d.place, other.place...)
	d.update = append(d.update, other.update...)
	d.migrate = append(d.migrate, other.migrate...)
	d.stop = append(d.stop, other.stop...)
	d.ignore = append(d.ignore, other.ignore...)
}

// diffAllocs is used to do a set difference between the target allocations
// and the existing allocations. This returns 5 sets of results, the list of
// named task groups that need to be placed (no existing allocation), the
// allocations that need to be updated (job definition is newer), allocs that
// need to be migrated (node is draining), the allocs that need to be evicted
// (no longer required), and those that should be ignored.
func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
	required map[string]*structs.TaskGroup, allocs []*structs.Allocation) *diffResult {
	result := &diffResult{}

	// Scan the existing updates
	existing := make(map[string]struct{})
	for _, exist := range allocs {
		// Index the existing node
		name := exist.Name
		existing[name] = struct{}{}

		// Check for the definition in the required set
		tg, ok := required[name]

		// If not required, we stop the alloc
		if !ok {
			result.stop = append(result.stop, allocTuple{
				Name:      name,
				TaskGroup: tg,
				Alloc:     exist,
			})
			continue
		}

		// If we are on a tainted node, we must migrate
		if taintedNodes[exist.NodeID] {
			result.migrate = append(result.migrate, allocTuple{
				Name:      name,
				TaskGroup: tg,
				Alloc:     exist,
			})
			continue
		}

		// If the definition is updated we need to update
		// XXX: This is an extremely conservative approach. We can check
		// if the job definition has changed in a way that affects
		// this allocation and potentially ignore it.
		if job.ModifyIndex != exist.Job.ModifyIndex {
			result.update = append(result.update, allocTuple{
				Name:      name,
				TaskGroup: tg,
				Alloc:     exist,
			})
			continue
		}

		// Everything is up-to-date
		result.ignore = append(result.ignore, allocTuple{
			Name:      name,
			TaskGroup: tg,
			Alloc:     exist,
		})
	}

	// Scan the required groups
	for name, tg := range required {
		// Check for an existing allocation
		_, ok := existing[name]

		// Require a placement if no existing allocation. If there
		// is an existing allocation, we would have checked for a potential
		// update or ignore above.
		if !ok {
			result.place = append(result.place, allocTuple{
				Name:      name,
				TaskGroup: tg,
			})
		}
	}
	return result
}

// diffSystemAllocs is like diffAllocs however, the allocations in the
// diffResult contain the specific nodeID they should be allocated on.
func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[string]bool,
	allocs []*structs.Allocation) *diffResult {

	// Build a mapping of nodes to all their allocs.
	nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))
	for _, alloc := range allocs {
		nallocs := append(nodeAllocs[alloc.NodeID], alloc)
		nodeAllocs[alloc.NodeID] = nallocs
	}

	for _, node := range nodes {
		if _, ok := nodeAllocs[node.ID]; !ok {
			nodeAllocs[node.ID] = nil
		}
	}

	// Create the required task groups.
	required := materializeTaskGroups(job)

	result := &diffResult{}
	for nodeID, allocs := range nodeAllocs {
		diff := diffAllocs(job, taintedNodes, required, allocs)

		// Mark the alloc as being for a specific node.
		for i := range diff.place {
			alloc := &diff.place[i]
			alloc.Alloc = &structs.Allocation{NodeID: nodeID}
		}

		// Migrate does not apply to system jobs and instead should be marked as
		// stop because if a node is tainted, the job is invalid on that node.
		diff.stop = append(diff.stop, diff.migrate...)
		diff.migrate = nil

		result.Append(diff)
	}

	return result
}

// readyNodesInDCs returns all the ready nodes in the given datacenters
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
	// Index the DCs
	dcMap := make(map[string]struct{}, len(dcs))
	for _, dc := range dcs {
		dcMap[dc] = struct{}{}
	}

	// Scan the nodes
	var out []*structs.Node
	iter, err := state.Nodes()
	if err != nil {
		return nil, err
	}
	for {
		raw := iter.Next()
		if raw == nil {
			break
		}

		// Filter on datacenter and status
		node := raw.(*structs.Node)
		if node.Status != structs.NodeStatusReady {
			continue
		}
		if node.Drain {
			continue
		}
		if _, ok := dcMap[node.Datacenter]; !ok {
			continue
		}
		out = append(out, node)
	}
	return out, nil
}

// retryMax is used to retry a callback until it returns success or
// a maximum number of attempts is reached
func retryMax(max int, cb func() (bool, error)) error {
	attempts := 0
	for attempts < max {
		done, err := cb()
		if err != nil {
			return err
		}
		if done {
			return nil
		}
		attempts += 1
	}
	return &SetStatusError{
		Err:        fmt.Errorf("maximum attempts reached (%d)", max),
		EvalStatus: structs.EvalStatusFailed,
	}
}

// taintedNodes is used to scan the allocations and then check if the
// underlying nodes are tainted, and should force a migration of the allocation.
func taintedNodes(state State, allocs []*structs.Allocation) (map[string]bool, error) {
	out := make(map[string]bool)
	for _, alloc := range allocs {
		if _, ok := out[alloc.NodeID]; ok {
			continue
		}

		node, err := state.NodeByID(alloc.NodeID)
		if err != nil {
			return nil, err
		}

		// If the node does not exist, we should migrate
		if node == nil {
			out[alloc.NodeID] = true
			continue
		}

		out[alloc.NodeID] = structs.ShouldDrainNode(node.Status) || node.Drain
	}
	return out, nil
}

// shuffleNodes randomizes the slice order with the Fisher-Yates algorithm
func shuffleNodes(nodes []*structs.Node) {
	n := len(nodes)
	for i := n - 1; i > 0; i-- {
		j := rand.Intn(i + 1)
		nodes[i], nodes[j] = nodes[j], nodes[i]
	}
}

// tasksUpdated does a diff between task groups to see if the
// tasks, their drivers or config have updated.
func tasksUpdated(a, b *structs.TaskGroup) bool {
	// If the number of tasks do not match, clearly there is an update
	if len(a.Tasks) != len(b.Tasks) {
		return true
	}

	// Check each task
	for _, at := range a.Tasks {
		bt := b.LookupTask(at.Name)
		if bt == nil {
			return true
		}
		if at.Driver != bt.Driver {
			return true
		}
		if !reflect.DeepEqual(at.Config, bt.Config) {
			return true
		}

		// Inspect the network to see if the dynamic ports are different
		if len(at.Resources.Networks) != len(bt.Resources.Networks) {
			return true
		}
		for idx := range at.Resources.Networks {
			an := at.Resources.Networks[idx]
			bn := bt.Resources.Networks[idx]
			if len(an.DynamicPorts) != len(bn.DynamicPorts) {
				return true
			}
		}
	}
	return false
}

// setStatus is used to update the status of the evaluation
func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
	logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
	newEval := eval.Copy()
	newEval.Status = status
	newEval.StatusDescription = desc
	if nextEval != nil {
		newEval.NextEval = nextEval.ID
	}
	return planner.UpdateEval(newEval)
}

// inplaceUpdate attempts to update allocations in-place where possible.
func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
	stack Stack, updates []allocTuple) []allocTuple {

	n := len(updates)
	inplace := 0
	for i := 0; i < n; i++ {
		// Get the update
		update := updates[i]

		// Check if the task drivers or config has changed, requires
		// a rolling upgrade since that cannot be done in-place.
		existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)
		if tasksUpdated(update.TaskGroup, existing) {
			continue
		}

		// Get the existing node
		node, err := ctx.State().NodeByID(update.Alloc.NodeID)
		if err != nil {
			ctx.Logger().Printf("[ERR] sched: %#v failed to get node '%s': %v",
				eval, update.Alloc.NodeID, err)
			continue
		}
		if node == nil {
			continue
		}

		// Set the existing node as the base set
		stack.SetNodes([]*structs.Node{node})

		// Stage an eviction of the current allocation
		ctx.Plan().AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,
			allocInPlace)

		// Attempt to match the task group
		option, size := stack.Select(update.TaskGroup)

		// Pop the allocation
		ctx.Plan().PopUpdate(update.Alloc)

		// Skip if we could not do an in-place update
		if option == nil {
			continue
		}

		// Restore the network offers from the existing allocation.
		// We do not allow network resources (reserved/dynamic ports)
		// to be updated. This is guarded in taskUpdated, so we can
		// safely restore those here.
		for task, resources := range option.TaskResources {
			existing := update.Alloc.TaskResources[task]
			resources.Networks = existing.Networks
		}

		// Create a shallow copy
		newAlloc := new(structs.Allocation)
		*newAlloc = *update.Alloc

		// Update the allocation
		newAlloc.EvalID = eval.ID
		newAlloc.Job = job
		newAlloc.Resources = size
		newAlloc.TaskResources = option.TaskResources
		newAlloc.Metrics = ctx.Metrics()
		newAlloc.DesiredStatus = structs.AllocDesiredStatusRun
		newAlloc.ClientStatus = structs.AllocClientStatusPending
		ctx.Plan().AppendAlloc(newAlloc)

		// Remove this allocation from the slice
		updates[i] = updates[n-1]
		i--
		n--
		inplace++
	}
	if len(updates) > 0 {
		ctx.Logger().Printf("[DEBUG] sched: %#v: %d in-place updates of %d", eval, inplace, len(updates))
	}
	return updates[:n]
}

// evictAndPlace is used to mark allocations for evicts and add them to the
// placement queue. evictAndPlace modifies both the the diffResult and the
// limit. It returns true if the limit has been reached.
func evictAndPlace(ctx Context, diff *diffResult, allocs []allocTuple, desc string, limit *int) bool {
	n := len(allocs)
	for i := 0; i < n && i < *limit; i++ {
		a := allocs[i]
		ctx.Plan().AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)
		diff.place = append(diff.place, a)
	}
	if n <= *limit {
		*limit -= n
		return false
	}
	*limit = 0
	return true
}

// tgConstrainTuple is used to store the total constraints of a task group.
type tgConstrainTuple struct {
	// Holds the combined constraints of the task group and all it's sub-tasks.
	constraints []*structs.Constraint

	// The set of required drivers within the task group.
	drivers map[string]struct{}

	// The combined resources of all tasks within the task group.
	size *structs.Resources
}

// taskGroupConstraints collects the constraints, drivers and resources required by each
// sub-task to aggregate the TaskGroup totals
func taskGroupConstraints(tg *structs.TaskGroup) tgConstrainTuple {
	c := tgConstrainTuple{
		constraints: make([]*structs.Constraint, 0, len(tg.Constraints)),
		drivers:     make(map[string]struct{}),
		size:        new(structs.Resources),
	}

	c.constraints = append(c.constraints, tg.Constraints...)
	for _, task := range tg.Tasks {
		c.drivers[task.Driver] = struct{}{}
		c.constraints = append(c.constraints, task.Constraints...)
		c.size.Add(task.Resources)
	}

	return c
}
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`package scheduler`

			`import (`
			`"fmt"`
Refactor shared code between schedulers 2015-10-15 00:26:20 +00:00			`"log"`
scheduler: pull node shuffle into util 2015-09-07 18:23:38 +00:00			`"math/rand"`
scheduler: util method to diff task groups 2015-09-07 19:25:23 +00:00			`"reflect"`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00
			`"github.com/hashicorp/nomad/nomad/structs"`
			`)`

scheduler: simplifying 2015-08-14 01:16:32 +00:00			`// allocTuple is a tuple of the allocation name and potential alloc ID`
			`type allocTuple struct {`
			`Name string`
			`TaskGroup *structs.TaskGroup`
			`Alloc *structs.Allocation`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`

			`// materializeTaskGroups is used to materialize all the task groups`
			`// a job requires. This is used to do the count expansion.`
			`func materializeTaskGroups(job structs.Job) map[string]structs.TaskGroup {`
			`out := make(map[string]*structs.TaskGroup)`
Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`if job == nil {`
			`return out`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`

System scheduler and system stack 2015-10-14 23:43:06 +00:00			`for _, tg := range job.TaskGroups {`
Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`for i := 0; i < tg.Count; i++ {`
			`name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)`
System scheduler and system stack 2015-10-14 23:43:06 +00:00			`out[name] = tg`
			`}`
			`}`
			`return out`
			`}`

scheduler: make diff less nasty 2015-08-14 01:28:09 +00:00			`// diffResult is used to return the sets that result from the diff`
			`type diffResult struct {`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`place, update, migrate, stop, ignore []allocTuple`
scheduler: make diff less nasty 2015-08-14 01:28:09 +00:00			`}`

			`func (d *diffResult) GoString() string {`
nomad: adding drain as node property 2015-09-07 02:47:02 +00:00			`return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d)",`
scheduler: updating for new APIs 2015-08-26 00:06:06 +00:00			`len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))`
scheduler: make diff less nasty 2015-08-14 01:28:09 +00:00			`}`

Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`func (d diffResult) Append(other diffResult) {`
			`d.place = append(d.place, other.place...)`
			`d.update = append(d.update, other.update...)`
			`d.migrate = append(d.migrate, other.migrate...)`
			`d.stop = append(d.stop, other.stop...)`
			`d.ignore = append(d.ignore, other.ignore...)`
			`}`

scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`// diffAllocs is used to do a set difference between the target allocations`
scheduler: determine if any allocations need to be migrated 2015-08-13 23:47:39 +00:00			`// and the existing allocations. This returns 5 sets of results, the list of`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`// named task groups that need to be placed (no existing allocation), the`
scheduler: determine if any allocations need to be migrated 2015-08-13 23:47:39 +00:00			`// allocations that need to be updated (job definition is newer), allocs that`
			`// need to be migrated (node is draining), the allocs that need to be evicted`
			`// (no longer required), and those that should be ignored.`
scheduler: make diff less nasty 2015-08-14 01:28:09 +00:00			`func diffAllocs(job *structs.Job, taintedNodes map[string]bool,`
			`required map[string]structs.TaskGroup, allocs []structs.Allocation) *diffResult {`
			`result := &diffResult{}`
scheduler: hide the indexing 2015-08-14 01:18:32 +00:00
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`// Scan the existing updates`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`existing := make(map[string]struct{})`
			`for _, exist := range allocs {`
			`// Index the existing node`
			`name := exist.Name`
			`existing[name] = struct{}{}`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`// Check for the definition in the required set`
			`tg, ok := required[name]`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00
scheduler: updating for new APIs 2015-08-26 00:06:06 +00:00			`// If not required, we stop the alloc`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`if !ok {`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`result.stop = append(result.stop, allocTuple{`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`Name: name,`
			`TaskGroup: tg,`
			`Alloc: exist,`
			`})`
			`continue`
			`}`
scheduler: determine if any allocations need to be migrated 2015-08-13 23:47:39 +00:00
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`// If we are on a tainted node, we must migrate`
			`if taintedNodes[exist.NodeID] {`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`result.migrate = append(result.migrate, allocTuple{`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`Name: name,`
			`TaskGroup: tg,`
			`Alloc: exist,`
			`})`
			`continue`
			`}`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`// If the definition is updated we need to update`
			`// XXX: This is an extremely conservative approach. We can check`
			`// if the job definition has changed in a way that affects`
			`// this allocation and potentially ignore it.`
			`if job.ModifyIndex != exist.Job.ModifyIndex {`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`result.update = append(result.update, allocTuple{`
scheduler: simplifying 2015-08-14 01:16:32 +00:00			`Name: name,`
			`TaskGroup: tg,`
			`Alloc: exist,`
			`})`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`continue`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00
			`// Everything is up-to-date`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`result.ignore = append(result.ignore, allocTuple{`
scheduler: remove explicit index 2015-08-14 01:20:55 +00:00			`Name: name,`
			`TaskGroup: tg,`
			`Alloc: exist,`
			`})`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`

			`// Scan the required groups`
scheduler: simplifying 2015-08-14 01:16:32 +00:00			`for name, tg := range required {`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`// Check for an existing allocation`
			`_, ok := existing[name]`

			`// Require a placement if no existing allocation. If there`
			`// is an existing allocation, we would have checked for a potential`
			`// update or ignore above.`
			`if !ok {`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`result.place = append(result.place, allocTuple{`
scheduler: simplifying 2015-08-14 01:16:32 +00:00			`Name: name,`
			`TaskGroup: tg,`
			`})`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`
			`}`
scheduler: make diff less nasty 2015-08-14 01:28:09 +00:00			`return result`
scheduler: testing utility methods 2015-08-13 23:25:59 +00:00			`}`

Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`// diffSystemAllocs is like diffAllocs however, the allocations in the`
			`// diffResult contain the specific nodeID they should be allocated on.`
			`func diffSystemAllocs(job structs.Job, nodes []structs.Node, taintedNodes map[string]bool,`
			`allocs []structs.Allocation) diffResult {`

			`// Build a mapping of nodes to all their allocs.`
			`nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))`
			`for _, alloc := range allocs {`
			`nallocs := append(nodeAllocs[alloc.NodeID], alloc)`
			`nodeAllocs[alloc.NodeID] = nallocs`
			`}`

			`for _, node := range nodes {`
			`if _, ok := nodeAllocs[node.ID]; !ok {`
			`nodeAllocs[node.ID] = nil`
			`}`
			`}`

			`// Create the required task groups.`
			`required := materializeTaskGroups(job)`

			`result := &diffResult{}`
			`for nodeID, allocs := range nodeAllocs {`
			`diff := diffAllocs(job, taintedNodes, required, allocs)`

			`// Mark the alloc as being for a specific node.`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`for i := range diff.place {`
			`alloc := &diff.place[i]`
Add diffSystemAlloc which gives richer information which node to place a system allocation 2015-10-15 20:14:44 +00:00			`alloc.Alloc = &structs.Allocation{NodeID: nodeID}`
			`}`

			`// Migrate does not apply to system jobs and instead should be marked as`
			`// stop because if a node is tainted, the job is invalid on that node.`
			`diff.stop = append(diff.stop, diff.migrate...)`
			`diff.migrate = nil`

			`result.Append(diff)`
			`}`

			`return result`
			`}`

scheduler: refactor and test 2015-08-14 00:19:09 +00:00			`// readyNodesInDCs returns all the ready nodes in the given datacenters`
			`func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {`
nomad: remove NodesByDatacenterStatus 2015-08-15 20:11:42 +00:00			`// Index the DCs`
			`dcMap := make(map[string]struct{}, len(dcs))`
scheduler: refactor and test 2015-08-14 00:19:09 +00:00			`for _, dc := range dcs {`
nomad: remove NodesByDatacenterStatus 2015-08-15 20:11:42 +00:00			`dcMap[dc] = struct{}{}`
			`}`

			`// Scan the nodes`
			`var out []*structs.Node`
			`iter, err := state.Nodes()`
			`if err != nil {`
			`return nil, err`
			`}`
			`for {`
			`raw := iter.Next()`
			`if raw == nil {`
			`break`
scheduler: refactor and test 2015-08-14 00:19:09 +00:00			`}`
nomad: remove NodesByDatacenterStatus 2015-08-15 20:11:42 +00:00
			`// Filter on datacenter and status`
			`node := raw.(*structs.Node)`
			`if node.Status != structs.NodeStatusReady {`
			`continue`
			`}`
nomad: adding drain as node property 2015-09-07 02:47:02 +00:00			`if node.Drain {`
			`continue`
			`}`
nomad: remove NodesByDatacenterStatus 2015-08-15 20:11:42 +00:00			`if _, ok := dcMap[node.Datacenter]; !ok {`
			`continue`
scheduler: refactor and test 2015-08-14 00:19:09 +00:00			`}`
nomad: remove NodesByDatacenterStatus 2015-08-15 20:11:42 +00:00			`out = append(out, node)`
scheduler: refactor and test 2015-08-14 00:19:09 +00:00			`}`
			`return out, nil`
			`}`
scheduler: trying to simplify further 2015-08-14 00:40:23 +00:00
			`// retryMax is used to retry a callback until it returns success or`
			`// a maximum number of attempts is reached`
			`func retryMax(max int, cb func() (bool, error)) error {`
			`attempts := 0`
			`for attempts < max {`
			`done, err := cb()`
			`if err != nil {`
			`return err`
			`}`
			`if done {`
			`return nil`
			`}`
			`attempts += 1`
			`}`
scheduler: update status and test retry limit 2015-08-15 21:47:13 +00:00			`return &SetStatusError{`
			`Err: fmt.Errorf("maximum attempts reached (%d)", max),`
			`EvalStatus: structs.EvalStatusFailed,`
			`}`
scheduler: trying to simplify further 2015-08-14 00:40:23 +00:00			`}`
schduler: refactor and test 2015-08-14 00:51:31 +00:00
			`// taintedNodes is used to scan the allocations and then check if the`
			`// underlying nodes are tainted, and should force a migration of the allocation.`
			`func taintedNodes(state State, allocs []*structs.Allocation) (map[string]bool, error) {`
			`out := make(map[string]bool)`
			`for _, alloc := range allocs {`
			`if _, ok := out[alloc.NodeID]; ok {`
			`continue`
			`}`

nomad: unifying the state store API 2015-09-07 03:56:38 +00:00			`node, err := state.NodeByID(alloc.NodeID)`
schduler: refactor and test 2015-08-14 00:51:31 +00:00			`if err != nil {`
			`return nil, err`
			`}`

scheduler: trimming more fat 2015-08-14 01:05:31 +00:00			`// If the node does not exist, we should migrate`
			`if node == nil {`
			`out[alloc.NodeID] = true`
			`continue`
			`}`

nomad: adding drain as node property 2015-09-07 02:47:02 +00:00			`out[alloc.NodeID] = structs.ShouldDrainNode(node.Status) \|\| node.Drain`
schduler: refactor and test 2015-08-14 00:51:31 +00:00			`}`
			`return out, nil`
			`}`
scheduler: pull node shuffle into util 2015-09-07 18:23:38 +00:00
			`// shuffleNodes randomizes the slice order with the Fisher-Yates algorithm`
			`func shuffleNodes(nodes []*structs.Node) {`
			`n := len(nodes)`
			`for i := n - 1; i > 0; i-- {`
			`j := rand.Intn(i + 1)`
			`nodes[i], nodes[j] = nodes[j], nodes[i]`
			`}`
			`}`
scheduler: util method to diff task groups 2015-09-07 19:25:23 +00:00
			`// tasksUpdated does a diff between task groups to see if the`
			`// tasks, their drivers or config have updated.`
			`func tasksUpdated(a, b *structs.TaskGroup) bool {`
			`// If the number of tasks do not match, clearly there is an update`
			`if len(a.Tasks) != len(b.Tasks) {`
			`return true`
			`}`

			`// Check each task`
			`for _, at := range a.Tasks {`
			`bt := b.LookupTask(at.Name)`
			`if bt == nil {`
			`return true`
			`}`
			`if at.Driver != bt.Driver {`
			`return true`
			`}`
			`if !reflect.DeepEqual(at.Config, bt.Config) {`
			`return true`
			`}`
schedule: avoid in-place update of task if network resources are different 2015-09-13 23:41:53 +00:00
scheduler: tasks updated should only check if number of dynamic ports is different 2015-10-04 19:53:02 +00:00			`// Inspect the network to see if the dynamic ports are different`
			`if len(at.Resources.Networks) != len(bt.Resources.Networks) {`
schedule: avoid in-place update of task if network resources are different 2015-09-13 23:41:53 +00:00			`return true`
			`}`
scheduler: tasks updated should only check if number of dynamic ports is different 2015-10-04 19:53:02 +00:00			`for idx := range at.Resources.Networks {`
			`an := at.Resources.Networks[idx]`
			`bn := bt.Resources.Networks[idx]`
			`if len(an.DynamicPorts) != len(bn.DynamicPorts) {`
			`return true`
			`}`
			`}`
scheduler: util method to diff task groups 2015-09-07 19:25:23 +00:00			`}`
			`return false`
			`}`
Refactor shared code between schedulers 2015-10-15 00:26:20 +00:00
			`// setStatus is used to update the status of the evaluation`
			`func setStatus(logger log.Logger, planner Planner, eval, nextEval structs.Evaluation, status, desc string) error {`
			`logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)`
			`newEval := eval.Copy()`
			`newEval.Status = status`
			`newEval.StatusDescription = desc`
			`if nextEval != nil {`
			`newEval.NextEval = nextEval.ID`
			`}`
			`return planner.UpdateEval(newEval)`
			`}`

			`// inplaceUpdate attempts to update allocations in-place where possible.`
			`func inplaceUpdate(ctx Context, eval structs.Evaluation, job structs.Job,`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`stack Stack, updates []allocTuple) []allocTuple {`
Refactor shared code between schedulers 2015-10-15 00:26:20 +00:00
			`n := len(updates)`
			`inplace := 0`
			`for i := 0; i < n; i++ {`
			`// Get the update`
			`update := updates[i]`

			`// Check if the task drivers or config has changed, requires`
			`// a rolling upgrade since that cannot be done in-place.`
			`existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name)`
			`if tasksUpdated(update.TaskGroup, existing) {`
			`continue`
			`}`

			`// Get the existing node`
			`node, err := ctx.State().NodeByID(update.Alloc.NodeID)`
			`if err != nil {`
			`ctx.Logger().Printf("[ERR] sched: %#v failed to get node '%s': %v",`
			`eval, update.Alloc.NodeID, err)`
			`continue`
			`}`
			`if node == nil {`
			`continue`
			`}`

			`// Set the existing node as the base set`
			`stack.SetNodes([]*structs.Node{node})`

			`// Stage an eviction of the current allocation`
			`ctx.Plan().AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop,`
			`allocInPlace)`

			`// Attempt to match the task group`
			`option, size := stack.Select(update.TaskGroup)`

			`// Pop the allocation`
			`ctx.Plan().PopUpdate(update.Alloc)`

			`// Skip if we could not do an in-place update`
			`if option == nil {`
			`continue`
			`}`

			`// Restore the network offers from the existing allocation.`
			`// We do not allow network resources (reserved/dynamic ports)`
			`// to be updated. This is guarded in taskUpdated, so we can`
			`// safely restore those here.`
			`for task, resources := range option.TaskResources {`
			`existing := update.Alloc.TaskResources[task]`
			`resources.Networks = existing.Networks`
			`}`

			`// Create a shallow copy`
			`newAlloc := new(structs.Allocation)`
			`newAlloc = update.Alloc`

			`// Update the allocation`
			`newAlloc.EvalID = eval.ID`
			`newAlloc.Job = job`
			`newAlloc.Resources = size`
			`newAlloc.TaskResources = option.TaskResources`
			`newAlloc.Metrics = ctx.Metrics()`
			`newAlloc.DesiredStatus = structs.AllocDesiredStatusRun`
			`newAlloc.ClientStatus = structs.AllocClientStatusPending`
			`ctx.Plan().AppendAlloc(newAlloc)`

			`// Remove this allocation from the slice`
			`updates[i] = updates[n-1]`
			`i--`
			`n--`
			`inplace++`
			`}`
			`if len(updates) > 0 {`
			`ctx.Logger().Printf("[DEBUG] sched: %#v: %d in-place updates of %d", eval, inplace, len(updates))`
			`}`
			`return updates[:n]`
			`}`

			`// evictAndPlace is used to mark allocations for evicts and add them to the`
			`// placement queue. evictAndPlace modifies both the the diffResult and the`
			`// limit. It returns true if the limit has been reached.`
diffResult stores values not pointers 2015-10-16 18:43:09 +00:00			`func evictAndPlace(ctx Context, diff diffResult, allocs []allocTuple, desc string, limit int) bool {`
Refactor shared code between schedulers 2015-10-15 00:26:20 +00:00			`n := len(allocs)`
			`for i := 0; i < n && i < *limit; i++ {`
			`a := allocs[i]`
			`ctx.Plan().AppendUpdate(a.Alloc, structs.AllocDesiredStatusStop, desc)`
			`diff.place = append(diff.place, a)`
			`}`
			`if n <= *limit {`
			`*limit -= n`
			`return false`
			`}`
			`*limit = 0`
			`return true`
			`}`
Refactor task group constraint logic in generic/system stack 2015-10-16 21:00:51 +00:00
			`// tgConstrainTuple is used to store the total constraints of a task group.`
			`type tgConstrainTuple struct {`
			`// Holds the combined constraints of the task group and all it's sub-tasks.`
			`constraints []*structs.Constraint`

			`// The set of required drivers within the task group.`
			`drivers map[string]struct{}`

			`// The combined resources of all tasks within the task group.`
			`size *structs.Resources`
			`}`

			`// taskGroupConstraints collects the constraints, drivers and resources required by each`
			`// sub-task to aggregate the TaskGroup totals`
			`func taskGroupConstraints(tg *structs.TaskGroup) tgConstrainTuple {`
			`c := tgConstrainTuple{`
			`constraints: make([]*structs.Constraint, 0, len(tg.Constraints)),`
			`drivers: make(map[string]struct{}),`
			`size: new(structs.Resources),`
			`}`

			`c.constraints = append(c.constraints, tg.Constraints...)`
			`for _, task := range tg.Tasks {`
			`c.drivers[task.Driver] = struct{}{}`
			`c.constraints = append(c.constraints, task.Constraints...)`
			`c.size.Add(task.Resources)`
			`}`

			`return c`
			`}`