open-nomad/scheduler/util.go

195 lines
4.9 KiB
Go
Raw Normal View History

2015-08-13 23:25:59 +00:00
package scheduler
import (
"fmt"
"github.com/hashicorp/nomad/nomad/structs"
)
2015-08-14 01:16:32 +00:00
// allocTuple is a tuple of the allocation name and potential alloc ID
type allocTuple struct {
Name string
TaskGroup *structs.TaskGroup
Alloc *structs.Allocation
2015-08-13 23:25:59 +00:00
}
// materializeTaskGroups is used to materialize all the task groups
// a job requires. This is used to do the count expansion.
func materializeTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
out := make(map[string]*structs.TaskGroup)
for _, tg := range job.TaskGroups {
for i := 0; i < tg.Count; i++ {
name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
out[name] = tg
}
}
return out
}
2015-08-14 01:28:09 +00:00
// diffResult is used to return the sets that result from the diff
type diffResult struct {
2015-08-26 00:06:06 +00:00
place, update, migrate, stop, ignore []allocTuple
2015-08-14 01:28:09 +00:00
}
func (d *diffResult) GoString() string {
2015-08-26 00:06:06 +00:00
return fmt.Sprintf("allocs: (place %d) (update %d) (stop %d) (evict %d) (ignore %d)",
len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))
2015-08-14 01:28:09 +00:00
}
2015-08-13 23:25:59 +00:00
// diffAllocs is used to do a set difference between the target allocations
// and the existing allocations. This returns 5 sets of results, the list of
2015-08-13 23:25:59 +00:00
// named task groups that need to be placed (no existing allocation), the
// allocations that need to be updated (job definition is newer), allocs that
// need to be migrated (node is draining), the allocs that need to be evicted
// (no longer required), and those that should be ignored.
2015-08-14 01:28:09 +00:00
func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
required map[string]*structs.TaskGroup, allocs []*structs.Allocation) *diffResult {
result := &diffResult{}
2015-08-14 01:18:32 +00:00
2015-08-13 23:25:59 +00:00
// Scan the existing updates
2015-08-14 01:20:55 +00:00
existing := make(map[string]struct{})
for _, exist := range allocs {
// Index the existing node
name := exist.Name
existing[name] = struct{}{}
2015-08-13 23:25:59 +00:00
2015-08-14 01:20:55 +00:00
// Check for the definition in the required set
tg, ok := required[name]
2015-08-13 23:25:59 +00:00
2015-08-26 00:06:06 +00:00
// If not required, we stop the alloc
2015-08-14 01:20:55 +00:00
if !ok {
2015-08-26 00:06:06 +00:00
result.stop = append(result.stop, allocTuple{
2015-08-14 01:20:55 +00:00
Name: name,
TaskGroup: tg,
Alloc: exist,
})
continue
}
2015-08-14 01:20:55 +00:00
// If we are on a tainted node, we must migrate
if taintedNodes[exist.NodeID] {
2015-08-14 01:28:09 +00:00
result.migrate = append(result.migrate, allocTuple{
2015-08-14 01:20:55 +00:00
Name: name,
TaskGroup: tg,
Alloc: exist,
})
continue
}
2015-08-13 23:25:59 +00:00
2015-08-14 01:20:55 +00:00
// If the definition is updated we need to update
// XXX: This is an extremely conservative approach. We can check
// if the job definition has changed in a way that affects
// this allocation and potentially ignore it.
if job.ModifyIndex != exist.Job.ModifyIndex {
2015-08-14 01:28:09 +00:00
result.update = append(result.update, allocTuple{
2015-08-14 01:16:32 +00:00
Name: name,
TaskGroup: tg,
Alloc: exist,
})
2015-08-14 01:20:55 +00:00
continue
2015-08-13 23:25:59 +00:00
}
2015-08-14 01:20:55 +00:00
// Everything is up-to-date
2015-08-14 01:28:09 +00:00
result.ignore = append(result.ignore, allocTuple{
2015-08-14 01:20:55 +00:00
Name: name,
TaskGroup: tg,
Alloc: exist,
})
2015-08-13 23:25:59 +00:00
}
// Scan the required groups
2015-08-14 01:16:32 +00:00
for name, tg := range required {
2015-08-13 23:25:59 +00:00
// Check for an existing allocation
_, ok := existing[name]
// Require a placement if no existing allocation. If there
// is an existing allocation, we would have checked for a potential
// update or ignore above.
if !ok {
2015-08-14 01:28:09 +00:00
result.place = append(result.place, allocTuple{
2015-08-14 01:16:32 +00:00
Name: name,
TaskGroup: tg,
})
2015-08-13 23:25:59 +00:00
}
}
2015-08-14 01:28:09 +00:00
return result
2015-08-13 23:25:59 +00:00
}
2015-08-14 00:19:09 +00:00
// readyNodesInDCs returns all the ready nodes in the given datacenters
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
2015-08-15 20:11:42 +00:00
// Index the DCs
dcMap := make(map[string]struct{}, len(dcs))
2015-08-14 00:19:09 +00:00
for _, dc := range dcs {
2015-08-15 20:11:42 +00:00
dcMap[dc] = struct{}{}
}
// Scan the nodes
var out []*structs.Node
iter, err := state.Nodes()
if err != nil {
return nil, err
}
for {
raw := iter.Next()
if raw == nil {
break
2015-08-14 00:19:09 +00:00
}
2015-08-15 20:11:42 +00:00
// Filter on datacenter and status
node := raw.(*structs.Node)
if node.Status != structs.NodeStatusReady {
continue
}
if _, ok := dcMap[node.Datacenter]; !ok {
continue
2015-08-14 00:19:09 +00:00
}
2015-08-15 20:11:42 +00:00
out = append(out, node)
2015-08-14 00:19:09 +00:00
}
return out, nil
}
2015-08-14 00:40:23 +00:00
// retryMax is used to retry a callback until it returns success or
// a maximum number of attempts is reached
func retryMax(max int, cb func() (bool, error)) error {
attempts := 0
for attempts < max {
done, err := cb()
if err != nil {
return err
}
if done {
return nil
}
attempts += 1
}
return &SetStatusError{
Err: fmt.Errorf("maximum attempts reached (%d)", max),
EvalStatus: structs.EvalStatusFailed,
}
2015-08-14 00:40:23 +00:00
}
2015-08-14 00:51:31 +00:00
// taintedNodes is used to scan the allocations and then check if the
// underlying nodes are tainted, and should force a migration of the allocation.
func taintedNodes(state State, allocs []*structs.Allocation) (map[string]bool, error) {
out := make(map[string]bool)
for _, alloc := range allocs {
if _, ok := out[alloc.NodeID]; ok {
continue
}
node, err := state.GetNodeByID(alloc.NodeID)
if err != nil {
return nil, err
}
2015-08-14 01:05:31 +00:00
// If the node does not exist, we should migrate
if node == nil {
out[alloc.NodeID] = true
continue
}
2015-08-14 00:51:31 +00:00
out[alloc.NodeID] = structs.ShouldDrainNode(node.Status)
}
return out, nil
}