2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2018-10-17 21:26:25 +00:00
|
|
|
package structs
|
|
|
|
|
|
|
|
// DeviceAccounter is used to account for device usage on a node. It can detect
|
|
|
|
// when a node is oversubscribed and can be used for deciding what devices are
|
|
|
|
// free
|
|
|
|
type DeviceAccounter struct {
|
2018-10-31 20:57:43 +00:00
|
|
|
// Devices maps a device group to its device accounter instance
|
2018-10-17 21:26:25 +00:00
|
|
|
Devices map[DeviceIdTuple]*DeviceAccounterInstance
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeviceAccounterInstance wraps a device and adds tracking to the instances of
|
|
|
|
// the device to determine if they are free or not.
|
|
|
|
type DeviceAccounterInstance struct {
|
|
|
|
// Device is the device being wrapped
|
|
|
|
Device *NodeDeviceResource
|
|
|
|
|
2018-10-31 20:57:43 +00:00
|
|
|
// Instances is a mapping of the device IDs to their usage.
|
2018-10-17 21:26:25 +00:00
|
|
|
// Only a value of 0 indicates that the instance is unused.
|
|
|
|
Instances map[string]int
|
|
|
|
}
|
|
|
|
|
2018-10-17 22:03:38 +00:00
|
|
|
// NewDeviceAccounter returns a new device accounter. The node is used to
|
|
|
|
// populate the set of available devices based on what healthy device instances
|
|
|
|
// exist on the node.
|
|
|
|
func NewDeviceAccounter(n *Node) *DeviceAccounter {
|
|
|
|
numDevices := 0
|
|
|
|
var devices []*NodeDeviceResource
|
|
|
|
|
|
|
|
// COMPAT(0.11): Remove in 0.11
|
|
|
|
if n.NodeResources != nil {
|
|
|
|
numDevices = len(n.NodeResources.Devices)
|
|
|
|
devices = n.NodeResources.Devices
|
|
|
|
}
|
|
|
|
|
|
|
|
d := &DeviceAccounter{
|
|
|
|
Devices: make(map[DeviceIdTuple]*DeviceAccounterInstance, numDevices),
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, dev := range devices {
|
|
|
|
id := *dev.ID()
|
|
|
|
d.Devices[id] = &DeviceAccounterInstance{
|
|
|
|
Device: dev,
|
|
|
|
Instances: make(map[string]int, len(dev.Instances)),
|
|
|
|
}
|
|
|
|
for _, instance := range dev.Instances {
|
|
|
|
// Skip unhealthy devices as they aren't allocatable
|
|
|
|
if !instance.Healthy {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
d.Devices[id].Instances[instance.ID] = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
2018-10-17 21:26:25 +00:00
|
|
|
// AddAllocs takes a set of allocations and internally marks which devices are
|
|
|
|
// used. If a device is used more than once by the set of passed allocations,
|
|
|
|
// the collision will be returned as true.
|
|
|
|
func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) {
|
|
|
|
for _, a := range allocs {
|
|
|
|
// Filter any terminal allocation
|
2022-09-13 19:52:47 +00:00
|
|
|
if a.ClientTerminalStatus() {
|
2018-10-17 21:26:25 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// COMPAT(0.11): Remove in 0.11
|
|
|
|
// If the alloc doesn't have the new style resources, it can't have
|
|
|
|
// devices
|
|
|
|
if a.AllocatedResources == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go through each task resource
|
|
|
|
for _, tr := range a.AllocatedResources.Tasks {
|
|
|
|
|
|
|
|
// Go through each assigned device group
|
|
|
|
for _, device := range tr.Devices {
|
|
|
|
devID := device.ID()
|
|
|
|
|
|
|
|
// Go through each assigned device
|
|
|
|
for _, instanceID := range device.DeviceIDs {
|
|
|
|
|
|
|
|
// Mark that we are using the device. It may not be in the
|
|
|
|
// map if the device is no longer being fingerprinted, is
|
|
|
|
// unhealthy, etc.
|
|
|
|
if devInst, ok := d.Devices[*devID]; ok {
|
|
|
|
if i, ok := devInst.Instances[instanceID]; ok {
|
|
|
|
// Mark that the device is in use
|
|
|
|
devInst.Instances[instanceID]++
|
|
|
|
|
|
|
|
if i != 0 {
|
|
|
|
collision = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddReserved marks the device instances in the passed device reservation as
|
|
|
|
// used and returns if there is a collision.
|
|
|
|
func (d *DeviceAccounter) AddReserved(res *AllocatedDeviceResource) (collision bool) {
|
|
|
|
// Lookup the device.
|
|
|
|
devInst, ok := d.Devices[*res.ID()]
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// For each reserved instance, mark it as used
|
|
|
|
for _, id := range res.DeviceIDs {
|
|
|
|
cur, ok := devInst.Instances[id]
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// It has already been used, so mark that there is a collision
|
|
|
|
if cur != 0 {
|
|
|
|
collision = true
|
|
|
|
}
|
|
|
|
|
|
|
|
devInst.Instances[id]++
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
2018-11-15 03:23:38 +00:00
|
|
|
|
|
|
|
// FreeCount returns the number of free device instances
|
|
|
|
func (i *DeviceAccounterInstance) FreeCount() int {
|
|
|
|
count := 0
|
|
|
|
for _, c := range i.Instances {
|
|
|
|
if c == 0 {
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return count
|
|
|
|
}
|