diff --git a/.changelog/15455.txt b/.changelog/15455.txt new file mode 100644 index 000000000..1267340f6 --- /dev/null +++ b/.changelog/15455.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: allow using device IDs in `affinity` and `constraint` +``` diff --git a/scheduler/feasible.go b/scheduler/feasible.go index ade5ce3c8..eb8a1045d 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -1373,6 +1373,13 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc // Handle the interpolations switch { + case "${device.ids}" == target: + ids := make([]string, len(d.Instances)) + for i, device := range d.Instances { + ids[i] = device.ID + } + return psstructs.NewStringAttribute(strings.Join(ids, ",")), true + case "${device.model}" == target: return psstructs.NewStringAttribute(d.Name), true diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index d93ab37a5..30fc0df6d 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -2682,6 +2682,11 @@ func TestDeviceChecker(t *testing.T) { LTarget: "${device.attr.cores_clock}", RTarget: "800MHz", }, + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: nvidia.Instances[0].ID, + }, }, }, }, @@ -2715,6 +2720,11 @@ func TestDeviceChecker(t *testing.T) { LTarget: "${device.attr.cores_clock}", RTarget: "800MHz", }, + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: fmt.Sprintf("%s,%s", nvidia.Instances[1].ID, nvidia.Instances[0].ID), + }, }, }, }, @@ -2818,6 +2828,24 @@ func TestDeviceChecker(t *testing.T) { }, }, }, + { + Name: "does not meet ID constraint", + Result: false, + NodeDevices: []*structs.NodeDeviceResource{nvidia}, + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: "not_valid", + }, + }, + }, + }, + }, } for _, c := range cases { diff --git a/website/content/docs/job-specification/device.mdx b/website/content/docs/job-specification/device.mdx index 6a975a390..3c97cae10 100644 --- a/website/content/docs/job-specification/device.mdx +++ b/website/content/docs/job-specification/device.mdx @@ -103,6 +103,15 @@ follows: + + + {'${device.ids}'} + + Comma separated list of device IDs in the group + + 9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4 + + {'${device.type}'} @@ -298,6 +307,23 @@ device "nvidia/gpu" { } ``` +### Affinity Towards Specific GPU Devices + +This example uses affinity to indicate scheduling preference towards specific +GPU devices, using their UUID as selection criteria. Since devices are +fingerprinted as a group, you may specify multiple IDs as a comma separated +list. + +```hcl +device "nvidia/gpu" { + affinity { + attribute = "${device.ids}" + operator = "set_contains" + value = "9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4" + } +} +``` + [affinity]: /docs/job-specification/affinity 'Nomad affinity Job Specification' [constraint]: /docs/job-specification/constraint 'Nomad constraint Job Specification' [devices]: /docs/devices 'Nomad Device Plugins'