2015-08-13 23:25:59 +00:00
|
|
|
package scheduler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2015-09-07 18:23:38 +00:00
|
|
|
"math/rand"
|
2015-09-07 19:25:23 +00:00
|
|
|
"reflect"
|
2015-08-13 23:25:59 +00:00
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
2015-08-14 01:16:32 +00:00
|
|
|
// allocTuple is a tuple of the allocation name and potential alloc ID
|
|
|
|
type allocTuple struct {
|
|
|
|
Name string
|
|
|
|
TaskGroup *structs.TaskGroup
|
|
|
|
Alloc *structs.Allocation
|
2015-08-13 23:25:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// materializeTaskGroups is used to materialize all the task groups
|
|
|
|
// a job requires. This is used to do the count expansion.
|
|
|
|
func materializeTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
|
|
|
|
out := make(map[string]*structs.TaskGroup)
|
|
|
|
for _, tg := range job.TaskGroups {
|
|
|
|
for i := 0; i < tg.Count; i++ {
|
|
|
|
name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
|
|
|
|
out[name] = tg
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
2015-08-14 01:28:09 +00:00
|
|
|
// diffResult is used to return the sets that result from the diff
|
|
|
|
type diffResult struct {
|
2015-08-26 00:06:06 +00:00
|
|
|
place, update, migrate, stop, ignore []allocTuple
|
2015-08-14 01:28:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (d *diffResult) GoString() string {
|
2015-09-07 02:47:02 +00:00
|
|
|
return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d)",
|
2015-08-26 00:06:06 +00:00
|
|
|
len(d.place), len(d.update), len(d.migrate), len(d.stop), len(d.ignore))
|
2015-08-14 01:28:09 +00:00
|
|
|
}
|
|
|
|
|
2015-08-13 23:25:59 +00:00
|
|
|
// diffAllocs is used to do a set difference between the target allocations
|
2015-08-13 23:47:39 +00:00
|
|
|
// and the existing allocations. This returns 5 sets of results, the list of
|
2015-08-13 23:25:59 +00:00
|
|
|
// named task groups that need to be placed (no existing allocation), the
|
2015-08-13 23:47:39 +00:00
|
|
|
// allocations that need to be updated (job definition is newer), allocs that
|
|
|
|
// need to be migrated (node is draining), the allocs that need to be evicted
|
|
|
|
// (no longer required), and those that should be ignored.
|
2015-08-14 01:28:09 +00:00
|
|
|
func diffAllocs(job *structs.Job, taintedNodes map[string]bool,
|
|
|
|
required map[string]*structs.TaskGroup, allocs []*structs.Allocation) *diffResult {
|
|
|
|
result := &diffResult{}
|
2015-08-14 01:18:32 +00:00
|
|
|
|
2015-08-13 23:25:59 +00:00
|
|
|
// Scan the existing updates
|
2015-08-14 01:20:55 +00:00
|
|
|
existing := make(map[string]struct{})
|
|
|
|
for _, exist := range allocs {
|
|
|
|
// Index the existing node
|
|
|
|
name := exist.Name
|
|
|
|
existing[name] = struct{}{}
|
2015-08-13 23:25:59 +00:00
|
|
|
|
2015-08-14 01:20:55 +00:00
|
|
|
// Check for the definition in the required set
|
|
|
|
tg, ok := required[name]
|
2015-08-13 23:25:59 +00:00
|
|
|
|
2015-08-26 00:06:06 +00:00
|
|
|
// If not required, we stop the alloc
|
2015-08-14 01:20:55 +00:00
|
|
|
if !ok {
|
2015-08-26 00:06:06 +00:00
|
|
|
result.stop = append(result.stop, allocTuple{
|
2015-08-14 01:20:55 +00:00
|
|
|
Name: name,
|
|
|
|
TaskGroup: tg,
|
|
|
|
Alloc: exist,
|
|
|
|
})
|
|
|
|
continue
|
|
|
|
}
|
2015-08-13 23:47:39 +00:00
|
|
|
|
2015-08-14 01:20:55 +00:00
|
|
|
// If we are on a tainted node, we must migrate
|
|
|
|
if taintedNodes[exist.NodeID] {
|
2015-08-14 01:28:09 +00:00
|
|
|
result.migrate = append(result.migrate, allocTuple{
|
2015-08-14 01:20:55 +00:00
|
|
|
Name: name,
|
|
|
|
TaskGroup: tg,
|
|
|
|
Alloc: exist,
|
|
|
|
})
|
|
|
|
continue
|
|
|
|
}
|
2015-08-13 23:25:59 +00:00
|
|
|
|
2015-08-14 01:20:55 +00:00
|
|
|
// If the definition is updated we need to update
|
|
|
|
// XXX: This is an extremely conservative approach. We can check
|
|
|
|
// if the job definition has changed in a way that affects
|
|
|
|
// this allocation and potentially ignore it.
|
|
|
|
if job.ModifyIndex != exist.Job.ModifyIndex {
|
2015-08-14 01:28:09 +00:00
|
|
|
result.update = append(result.update, allocTuple{
|
2015-08-14 01:16:32 +00:00
|
|
|
Name: name,
|
|
|
|
TaskGroup: tg,
|
|
|
|
Alloc: exist,
|
|
|
|
})
|
2015-08-14 01:20:55 +00:00
|
|
|
continue
|
2015-08-13 23:25:59 +00:00
|
|
|
}
|
2015-08-14 01:20:55 +00:00
|
|
|
|
|
|
|
// Everything is up-to-date
|
2015-08-14 01:28:09 +00:00
|
|
|
result.ignore = append(result.ignore, allocTuple{
|
2015-08-14 01:20:55 +00:00
|
|
|
Name: name,
|
|
|
|
TaskGroup: tg,
|
|
|
|
Alloc: exist,
|
|
|
|
})
|
2015-08-13 23:25:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Scan the required groups
|
2015-08-14 01:16:32 +00:00
|
|
|
for name, tg := range required {
|
2015-08-13 23:25:59 +00:00
|
|
|
// Check for an existing allocation
|
|
|
|
_, ok := existing[name]
|
|
|
|
|
|
|
|
// Require a placement if no existing allocation. If there
|
|
|
|
// is an existing allocation, we would have checked for a potential
|
|
|
|
// update or ignore above.
|
|
|
|
if !ok {
|
2015-08-14 01:28:09 +00:00
|
|
|
result.place = append(result.place, allocTuple{
|
2015-08-14 01:16:32 +00:00
|
|
|
Name: name,
|
|
|
|
TaskGroup: tg,
|
|
|
|
})
|
2015-08-13 23:25:59 +00:00
|
|
|
}
|
|
|
|
}
|
2015-08-14 01:28:09 +00:00
|
|
|
return result
|
2015-08-13 23:25:59 +00:00
|
|
|
}
|
|
|
|
|
2015-08-14 00:19:09 +00:00
|
|
|
// readyNodesInDCs returns all the ready nodes in the given datacenters
|
|
|
|
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
|
2015-08-15 20:11:42 +00:00
|
|
|
// Index the DCs
|
|
|
|
dcMap := make(map[string]struct{}, len(dcs))
|
2015-08-14 00:19:09 +00:00
|
|
|
for _, dc := range dcs {
|
2015-08-15 20:11:42 +00:00
|
|
|
dcMap[dc] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan the nodes
|
|
|
|
var out []*structs.Node
|
|
|
|
iter, err := state.Nodes()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
raw := iter.Next()
|
|
|
|
if raw == nil {
|
|
|
|
break
|
2015-08-14 00:19:09 +00:00
|
|
|
}
|
2015-08-15 20:11:42 +00:00
|
|
|
|
|
|
|
// Filter on datacenter and status
|
|
|
|
node := raw.(*structs.Node)
|
|
|
|
if node.Status != structs.NodeStatusReady {
|
|
|
|
continue
|
|
|
|
}
|
2015-09-07 02:47:02 +00:00
|
|
|
if node.Drain {
|
|
|
|
continue
|
|
|
|
}
|
2015-08-15 20:11:42 +00:00
|
|
|
if _, ok := dcMap[node.Datacenter]; !ok {
|
|
|
|
continue
|
2015-08-14 00:19:09 +00:00
|
|
|
}
|
2015-08-15 20:11:42 +00:00
|
|
|
out = append(out, node)
|
2015-08-14 00:19:09 +00:00
|
|
|
}
|
|
|
|
return out, nil
|
|
|
|
}
|
2015-08-14 00:40:23 +00:00
|
|
|
|
|
|
|
// retryMax is used to retry a callback until it returns success or
|
|
|
|
// a maximum number of attempts is reached
|
|
|
|
func retryMax(max int, cb func() (bool, error)) error {
|
|
|
|
attempts := 0
|
|
|
|
for attempts < max {
|
|
|
|
done, err := cb()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if done {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
attempts += 1
|
|
|
|
}
|
2015-08-15 21:47:13 +00:00
|
|
|
return &SetStatusError{
|
|
|
|
Err: fmt.Errorf("maximum attempts reached (%d)", max),
|
|
|
|
EvalStatus: structs.EvalStatusFailed,
|
|
|
|
}
|
2015-08-14 00:40:23 +00:00
|
|
|
}
|
2015-08-14 00:51:31 +00:00
|
|
|
|
|
|
|
// taintedNodes is used to scan the allocations and then check if the
|
|
|
|
// underlying nodes are tainted, and should force a migration of the allocation.
|
|
|
|
func taintedNodes(state State, allocs []*structs.Allocation) (map[string]bool, error) {
|
|
|
|
out := make(map[string]bool)
|
|
|
|
for _, alloc := range allocs {
|
|
|
|
if _, ok := out[alloc.NodeID]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-09-07 03:56:38 +00:00
|
|
|
node, err := state.NodeByID(alloc.NodeID)
|
2015-08-14 00:51:31 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-08-14 01:05:31 +00:00
|
|
|
// If the node does not exist, we should migrate
|
|
|
|
if node == nil {
|
|
|
|
out[alloc.NodeID] = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-09-07 02:47:02 +00:00
|
|
|
out[alloc.NodeID] = structs.ShouldDrainNode(node.Status) || node.Drain
|
2015-08-14 00:51:31 +00:00
|
|
|
}
|
|
|
|
return out, nil
|
|
|
|
}
|
2015-09-07 18:23:38 +00:00
|
|
|
|
|
|
|
// shuffleNodes randomizes the slice order with the Fisher-Yates algorithm
|
|
|
|
func shuffleNodes(nodes []*structs.Node) {
|
|
|
|
n := len(nodes)
|
|
|
|
for i := n - 1; i > 0; i-- {
|
|
|
|
j := rand.Intn(i + 1)
|
|
|
|
nodes[i], nodes[j] = nodes[j], nodes[i]
|
|
|
|
}
|
|
|
|
}
|
2015-09-07 19:25:23 +00:00
|
|
|
|
|
|
|
// tasksUpdated does a diff between task groups to see if the
|
|
|
|
// tasks, their drivers or config have updated.
|
|
|
|
func tasksUpdated(a, b *structs.TaskGroup) bool {
|
|
|
|
// If the number of tasks do not match, clearly there is an update
|
|
|
|
if len(a.Tasks) != len(b.Tasks) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check each task
|
|
|
|
for _, at := range a.Tasks {
|
|
|
|
bt := b.LookupTask(at.Name)
|
|
|
|
if bt == nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if at.Driver != bt.Driver {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(at.Config, bt.Config) {
|
|
|
|
return true
|
|
|
|
}
|
2015-09-13 23:41:53 +00:00
|
|
|
|
2015-10-04 19:53:02 +00:00
|
|
|
// Inspect the network to see if the dynamic ports are different
|
|
|
|
if len(at.Resources.Networks) != len(bt.Resources.Networks) {
|
2015-09-13 23:41:53 +00:00
|
|
|
return true
|
|
|
|
}
|
2015-10-04 19:53:02 +00:00
|
|
|
for idx := range at.Resources.Networks {
|
|
|
|
an := at.Resources.Networks[idx]
|
|
|
|
bn := bt.Resources.Networks[idx]
|
|
|
|
if len(an.DynamicPorts) != len(bn.DynamicPorts) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
2015-09-07 19:25:23 +00:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|