2018-10-15 22:15:46 +00:00
|
|
|
package scheduler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// deviceAllocator is used to allocate devices to allocations. The allocator
|
|
|
|
// tracks availability as to not double allocate devices.
|
2018-10-15 22:15:46 +00:00
|
|
|
type deviceAllocator struct {
|
|
|
|
ctx Context
|
|
|
|
devices map[structs.DeviceIdTuple]*deviceAllocatorInstance
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// deviceAllocatorInstance wraps a device and adds tracking to the instances of
|
|
|
|
// the device to determine if they are free or not.
|
2018-10-15 22:15:46 +00:00
|
|
|
type deviceAllocatorInstance struct {
|
2018-10-17 18:04:54 +00:00
|
|
|
// d is the device being wrapped
|
|
|
|
d *structs.NodeDeviceResource
|
2018-10-15 22:15:46 +00:00
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// instances is a mapping of the device IDs of the instances to their usage.
|
|
|
|
// Only a value of 0 indicates that the instance is unused.
|
|
|
|
instances map[string]int
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// newDeviceAllocator returns a new device allocator. The node is used to
|
|
|
|
// populate the set of available devices based on what healthy device instances
|
|
|
|
// exist on the node.
|
2018-10-15 22:15:46 +00:00
|
|
|
func newDeviceAllocator(ctx Context, n *structs.Node) *deviceAllocator {
|
|
|
|
numDevices := 0
|
|
|
|
var devices []*structs.NodeDeviceResource
|
|
|
|
|
|
|
|
// COMPAT(0.11): Remove in 0.11
|
|
|
|
if n.NodeResources != nil {
|
|
|
|
numDevices = len(n.NodeResources.Devices)
|
|
|
|
devices = n.NodeResources.Devices
|
|
|
|
}
|
|
|
|
|
|
|
|
d := &deviceAllocator{
|
|
|
|
ctx: ctx,
|
|
|
|
devices: make(map[structs.DeviceIdTuple]*deviceAllocatorInstance, numDevices),
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, dev := range devices {
|
|
|
|
id := *dev.ID()
|
|
|
|
d.devices[id] = &deviceAllocatorInstance{
|
|
|
|
d: dev,
|
|
|
|
instances: make(map[string]int, len(dev.Instances)),
|
|
|
|
}
|
|
|
|
for _, instance := range dev.Instances {
|
|
|
|
// Skip unhealthy devices as they aren't allocatable
|
|
|
|
if !instance.Healthy {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
d.devices[id].instances[instance.ID] = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddAllocs takes a set of allocations and internally marks which devices are
|
2018-10-17 18:04:54 +00:00
|
|
|
// used. If a device is used more than once by the set of passed allocations,
|
|
|
|
// the collision will be returned as true.
|
2018-10-15 22:15:46 +00:00
|
|
|
func (d *deviceAllocator) AddAllocs(allocs []*structs.Allocation) (collision bool) {
|
|
|
|
for _, a := range allocs {
|
|
|
|
// Filter any terminal allocation
|
|
|
|
if a.TerminalStatus() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// COMPAT(0.11): Remove in 0.11
|
|
|
|
// If the alloc doesn't have the new style resources, it can't have
|
|
|
|
// devices
|
|
|
|
if a.AllocatedResources == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go through each task resource
|
|
|
|
for _, tr := range a.AllocatedResources.Tasks {
|
|
|
|
|
|
|
|
// Go through each assigned device group
|
|
|
|
for _, device := range tr.Devices {
|
|
|
|
devID := device.ID()
|
|
|
|
|
|
|
|
// Go through each assigned device
|
|
|
|
for _, instanceID := range device.DeviceIDs {
|
|
|
|
|
|
|
|
// Mark that we are using the device. It may not be in the
|
|
|
|
// map if the device is no longer being fingerprinted, is
|
|
|
|
// unhealthy, etc.
|
|
|
|
if devInst, ok := d.devices[*devID]; ok {
|
|
|
|
if i, ok := devInst.instances[instanceID]; ok {
|
|
|
|
// Mark that the device is in use
|
|
|
|
devInst.instances[instanceID]++
|
|
|
|
|
|
|
|
if i != 0 {
|
|
|
|
collision = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// AddReserved marks the device instances in the passed device reservation as
|
|
|
|
// used and returns if there is a collision.
|
2018-10-15 22:15:46 +00:00
|
|
|
func (d *deviceAllocator) AddReserved(res *structs.AllocatedDeviceResource) (collision bool) {
|
2018-10-17 18:04:54 +00:00
|
|
|
// Lookup the device.
|
2018-10-15 22:15:46 +00:00
|
|
|
devInst, ok := d.devices[*res.ID()]
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// For each reserved instance, mark it as used
|
2018-10-15 22:15:46 +00:00
|
|
|
for _, id := range res.DeviceIDs {
|
|
|
|
cur, ok := devInst.instances[id]
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// It has already been used, so mark that there is a collision
|
2018-10-15 22:15:46 +00:00
|
|
|
if cur != 0 {
|
|
|
|
collision = true
|
|
|
|
}
|
|
|
|
|
|
|
|
devInst.instances[id]++
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// AssignDevice takes a device request and returns an assignment as well as a
|
|
|
|
// score for the assignment. If no assignment could be made, an error is
|
|
|
|
// returned explaining why.
|
|
|
|
func (d *deviceAllocator) AssignDevice(ask *structs.RequestedDevice) (out *structs.AllocatedDeviceResource, score float64, err error) {
|
2018-10-15 22:15:46 +00:00
|
|
|
// Try to hot path
|
|
|
|
if len(d.devices) == 0 {
|
2018-10-17 18:04:54 +00:00
|
|
|
return nil, 0.0, fmt.Errorf("no devices available")
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|
|
|
|
if ask.Count == 0 {
|
2018-10-17 18:04:54 +00:00
|
|
|
return nil, 0.0, fmt.Errorf("invalid request of zero devices")
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Hold the current best offer
|
|
|
|
var offer *structs.AllocatedDeviceResource
|
2018-10-17 18:04:54 +00:00
|
|
|
var offerScore float64
|
2018-10-15 22:15:46 +00:00
|
|
|
|
|
|
|
// Determine the devices that are feasible based on availability and
|
|
|
|
// constraints
|
|
|
|
for id, devInst := range d.devices {
|
|
|
|
// Check if we have enough unused instances to use this
|
|
|
|
assignable := uint64(0)
|
|
|
|
for _, v := range devInst.instances {
|
|
|
|
if v == 0 {
|
|
|
|
assignable++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This device doesn't have enough instances
|
|
|
|
if assignable < ask.Count {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the device works
|
|
|
|
if !nodeDeviceMatches(d.ctx, devInst.d, ask) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Score the choice
|
|
|
|
var choiceScore float64
|
2018-10-17 18:04:54 +00:00
|
|
|
if l := len(ask.Affinities); l != 0 {
|
|
|
|
for _, a := range ask.Affinities {
|
|
|
|
// Resolve the targets
|
|
|
|
lVal, ok := resolveDeviceTarget(a.LTarget, devInst.d)
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
rVal, ok := resolveDeviceTarget(a.RTarget, devInst.d)
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if satisfied
|
|
|
|
if !checkAttributeAffinity(d.ctx, a.Operand, lVal, rVal) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
choiceScore += a.Weight
|
|
|
|
}
|
|
|
|
|
|
|
|
// normalize
|
|
|
|
choiceScore /= float64(l)
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// Only use the device if it is a higher score than we have already seen
|
|
|
|
if offer != nil && choiceScore < offerScore {
|
2018-10-15 22:15:46 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the new highest score
|
2018-10-17 18:04:54 +00:00
|
|
|
offerScore = choiceScore
|
2018-10-15 22:15:46 +00:00
|
|
|
|
|
|
|
// Build the choice
|
|
|
|
offer = &structs.AllocatedDeviceResource{
|
|
|
|
Vendor: id.Vendor,
|
|
|
|
Type: id.Type,
|
|
|
|
Name: id.Name,
|
|
|
|
DeviceIDs: make([]string, 0, ask.Count),
|
|
|
|
}
|
|
|
|
|
|
|
|
assigned := uint64(0)
|
|
|
|
for id, v := range devInst.instances {
|
|
|
|
if v == 0 && assigned < ask.Count {
|
|
|
|
assigned++
|
|
|
|
offer.DeviceIDs = append(offer.DeviceIDs, id)
|
|
|
|
if assigned == ask.Count {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
// Failed to find a match
|
2018-10-15 22:15:46 +00:00
|
|
|
if offer == nil {
|
2018-10-17 18:04:54 +00:00
|
|
|
return nil, 0.0, fmt.Errorf("no devices match request")
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|
|
|
|
|
2018-10-17 18:04:54 +00:00
|
|
|
return offer, offerScore, nil
|
2018-10-15 22:15:46 +00:00
|
|
|
}
|