open-nomad/scheduler/preemption_test.go
Seth Hoenig ba728f8f97
api: enable support for setting original job source (#16763)
* api: enable support for setting original source alongside job

This PR adds support for setting job source material along with
the registration of a job.

This includes a new HTTP endpoint and a new RPC endpoint for
making queries for the original source of a job. The
HTTP endpoint is /v1/job/<id>/submission?version=<version> and
the RPC method is Job.GetJobSubmission.

The job source (if submitted, and doing so is always optional), is
stored in the job_submission memdb table, separately from the
actual job. This way we do not incur overhead of reading the large
string field throughout normal job operations.

The server config now includes job_max_source_size for configuring
the maximum size the job source may be, before the server simply
drops the source material. This should help prevent Bad Things from
happening when huge jobs are submitted. If the value is set to 0,
all job source material will be dropped.

* api: avoid writing var content to disk for parsing

* api: move submission validation into RPC layer

* api: return an error if updating a job submission without namespace or job id

* api: be exact about the job index we associate a submission with (modify)

* api: reword api docs scheduling

* api: prune all but the last 6 job submissions

* api: protect against nil job submission in job validation

* api: set max job source size in test server

* api: fixups from pr
2023-04-11 08:45:08 -05:00

1576 lines
39 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package scheduler
import (
"fmt"
"strconv"
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/stretchr/testify/require"
)
func TestResourceDistance(t *testing.T) {
ci.Parallel(t)
resourceAsk := &structs.ComparableResources{
Flattened: structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 2048,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
MBits: 1024,
},
},
},
Shared: structs.AllocatedSharedResources{
DiskMB: 4096,
},
}
type testCase struct {
allocResource *structs.ComparableResources
expectedDistance string
}
testCases := []*testCase{
{
&structs.ComparableResources{
Flattened: structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 2048,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
MBits: 1024,
},
},
},
Shared: structs.AllocatedSharedResources{
DiskMB: 4096,
},
},
"0.000",
},
{
&structs.ComparableResources{
Flattened: structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 400,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
MBits: 1024,
},
},
},
Shared: structs.AllocatedSharedResources{
DiskMB: 1024,
},
},
"0.928",
},
{
&structs.ComparableResources{
Flattened: structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 8192,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 200,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
MBits: 512,
},
},
},
Shared: structs.AllocatedSharedResources{
DiskMB: 1024,
},
},
"3.152",
},
{
&structs.ComparableResources{
Flattened: structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 2048,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 500,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
MBits: 1024,
},
},
},
Shared: structs.AllocatedSharedResources{
DiskMB: 4096,
},
},
"0.023",
},
}
for _, tc := range testCases {
t.Run("", func(t *testing.T) {
require := require.New(t)
actualDistance := fmt.Sprintf("%3.3f", basicResourceDistance(resourceAsk, tc.allocResource))
require.Equal(tc.expectedDistance, actualDistance)
})
}
}
func TestPreemption(t *testing.T) {
ci.Parallel(t)
type testCase struct {
desc string
currentAllocations []*structs.Allocation
nodeReservedCapacity *structs.NodeReservedResources
nodeCapacity *structs.NodeResources
resourceAsk *structs.Resources
jobPriority int
currentPreemptions []*structs.Allocation
preemptedAllocIDs map[string]struct{}
}
highPrioJob := mock.Job()
highPrioJob.Priority = 100
lowPrioJob := mock.Job()
lowPrioJob.Priority = 30
lowPrioJob2 := mock.Job()
lowPrioJob2.Priority = 40
// Create some persistent alloc ids to use in test cases
allocIDs := []string{uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate()}
var deviceIDs []string
for i := 0; i < 10; i++ {
deviceIDs = append(deviceIDs, "dev"+strconv.Itoa(i))
}
defaultNodeResources := &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 4000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 8192,
},
Disk: structs.NodeDiskResources{
DiskMB: 100 * 1024,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
CIDR: "192.168.0.100/32",
MBits: 1000,
},
},
Devices: []*structs.NodeDeviceResource{
{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
Attributes: map[string]*psstructs.Attribute{
"memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB),
"cuda_cores": psstructs.NewIntAttribute(3584, ""),
"graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz),
"memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS),
},
Instances: []*structs.NodeDevice{
{
ID: deviceIDs[0],
Healthy: true,
},
{
ID: deviceIDs[1],
Healthy: true,
},
{
ID: deviceIDs[2],
Healthy: true,
},
{
ID: deviceIDs[3],
Healthy: true,
},
},
},
{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
Attributes: map[string]*psstructs.Attribute{
"memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB),
"cuda_cores": psstructs.NewIntAttribute(3584, ""),
"graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz),
"memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS),
},
Instances: []*structs.NodeDevice{
{
ID: deviceIDs[4],
Healthy: true,
},
{
ID: deviceIDs[5],
Healthy: true,
},
{
ID: deviceIDs[6],
Healthy: true,
},
{
ID: deviceIDs[7],
Healthy: true,
},
{
ID: deviceIDs[8],
Healthy: true,
},
},
},
{
Type: "fpga",
Vendor: "intel",
Name: "F100",
Attributes: map[string]*psstructs.Attribute{
"memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB),
},
Instances: []*structs.NodeDevice{
{
ID: "fpga1",
Healthy: true,
},
{
ID: "fpga2",
Healthy: false,
},
},
},
},
}
reservedNodeResources := &structs.NodeReservedResources{
Cpu: structs.NodeReservedCpuResources{
CpuShares: 100,
},
Memory: structs.NodeReservedMemoryResources{
MemoryMB: 256,
},
Disk: structs.NodeReservedDiskResources{
DiskMB: 4 * 1024,
},
}
testCases := []testCase{
{
desc: "No preemption because existing allocs are not low priority",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 3200,
MemoryMB: 7256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 50,
},
},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 2000,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
ReservedPorts: []structs.Port{{Label: "ssh", Value: 22}},
MBits: 1,
},
},
},
},
{
desc: "Preempting low priority allocs not enough to meet resource ask",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 3200,
MemoryMB: 7256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 50,
},
},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 4000,
MemoryMB: 8192,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
ReservedPorts: []structs.Port{{Label: "ssh", Value: 22}},
MBits: 1,
},
},
},
},
{
desc: "preemption impossible - static port needed is used by higher priority alloc",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], highPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 600,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 600,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 700,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
},
},
{
desc: "preempt only from device that has allocation with unused reserved port",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], highPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth1",
IP: "192.168.0.200",
MBits: 600,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 600,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
// This test sets up a node with two NICs
nodeCapacity: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 4000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 8192,
},
Disk: structs.NodeDiskResources{
DiskMB: 100 * 1024,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
CIDR: "192.168.0.100/32",
MBits: 1000,
},
{
Device: "eth1",
CIDR: "192.168.1.100/32",
MBits: 1000,
},
},
},
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 600,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
IP: "192.168.0.100",
MBits: 700,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[2]: {},
},
},
{
desc: "Combination of high/low priority allocs, without static ports",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 2800,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAllocWithTaskgroupNetwork(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 200,
},
},
}, &structs.NetworkResource{
Device: "eth0",
IP: "192.168.0.201",
MBits: 300,
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 300,
},
},
}),
createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
CPU: 700,
MemoryMB: 256,
DiskMB: 4 * 1024,
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1100,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 840,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
allocIDs[2]: {},
allocIDs[3]: {},
},
},
{
desc: "preempt allocs with network devices",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 2800,
MemoryMB: 2256,
DiskMB: 4 * 1024,
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 800,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1100,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 840,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
},
},
{
desc: "ignore allocs with close enough priority for network devices",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 2800,
MemoryMB: 2256,
DiskMB: 4 * 1024,
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 800,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: lowPrioJob.Priority + 5,
resourceAsk: &structs.Resources{
CPU: 1100,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 840,
},
},
},
preemptedAllocIDs: nil,
},
{
desc: "Preemption needed for all resources except network",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 2800,
MemoryMB: 2256,
DiskMB: 40 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 50,
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 25 * 1024,
}),
createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{
CPU: 700,
MemoryMB: 276,
DiskMB: 20 * 1024,
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 3000,
DiskMB: 50 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 50,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
allocIDs[2]: {},
allocIDs[3]: {},
},
},
{
desc: "Only one low priority alloc needs to be preempted",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 500,
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 320,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 300,
MemoryMB: 500,
DiskMB: 5 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 320,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[2]: {},
},
},
{
desc: "one alloc meets static port need, another meets remaining mbits needed",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 500,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 200,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 2700,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 800,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
allocIDs[2]: {},
},
},
{
desc: "alloc that meets static port need also meets other needs",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 600,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 100,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 600,
MemoryMB: 1000,
DiskMB: 25 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 700,
ReservedPorts: []structs.Port{
{
Label: "db",
Value: 88,
},
},
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
},
},
{
desc: "alloc from job that has existing evictions not chosen for preemption",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1200,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 500,
},
},
}),
createAlloc(allocIDs[2], lowPrioJob2, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 300,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 300,
MemoryMB: 500,
DiskMB: 5 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 320,
},
},
},
currentPreemptions: []*structs.Allocation{
createAlloc(allocIDs[4], lowPrioJob2, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 300,
},
},
}),
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
},
},
{
desc: "Preemption with one device instance per alloc",
// Add allocations that use two device instances
currentAllocations: []*structs.Allocation{
createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 500,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[0]},
}),
createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[1]},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 512,
DiskMB: 4 * 1024,
Devices: []*structs.RequestedDevice{
{
Name: "nvidia/gpu/1080ti",
Count: 4,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[0]: {},
allocIDs[1]: {},
},
},
{
desc: "Preemption multiple devices used",
currentAllocations: []*structs.Allocation{
createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 500,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]},
}),
createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "fpga",
Vendor: "intel",
Name: "F100",
DeviceIDs: []string{"fpga1"},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 512,
DiskMB: 4 * 1024,
Devices: []*structs.RequestedDevice{
{
Name: "nvidia/gpu/1080ti",
Count: 4,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[0]: {},
},
},
{
// This test cases creates allocations across two GPUs
// Both GPUs are eligible for the task, but only allocs sharing the
// same device should be chosen for preemption
desc: "Preemption with allocs across multiple devices that match",
currentAllocations: []*structs.Allocation{
createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 500,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[0], deviceIDs[1]},
}),
createAllocWithDevice(allocIDs[1], highPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 100,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[2]},
}),
createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
DeviceIDs: []string{deviceIDs[4], deviceIDs[5]},
}),
createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
CPU: 100,
MemoryMB: 256,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
DeviceIDs: []string{deviceIDs[6], deviceIDs[7]},
}),
createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "fpga",
Vendor: "intel",
Name: "F100",
DeviceIDs: []string{"fpga1"},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 512,
DiskMB: 4 * 1024,
Devices: []*structs.RequestedDevice{
{
Name: "gpu",
Count: 4,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[2]: {},
allocIDs[3]: {},
},
},
{
// This test cases creates allocations across two GPUs
// Both GPUs are eligible for the task, but only allocs with the lower
// priority are chosen
desc: "Preemption with lower/higher priority combinations",
currentAllocations: []*structs.Allocation{
createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 500,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[0], deviceIDs[1]},
}),
createAllocWithDevice(allocIDs[1], lowPrioJob2, &structs.Resources{
CPU: 200,
MemoryMB: 100,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[2], deviceIDs[3]},
}),
createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 256,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
DeviceIDs: []string{deviceIDs[4], deviceIDs[5]},
}),
createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{
CPU: 100,
MemoryMB: 256,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
DeviceIDs: []string{deviceIDs[6], deviceIDs[7]},
}),
createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{
CPU: 100,
MemoryMB: 256,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "2080ti",
DeviceIDs: []string{deviceIDs[8]},
}),
createAllocWithDevice(allocIDs[5], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "fpga",
Vendor: "intel",
Name: "F100",
DeviceIDs: []string{"fpga1"},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 512,
DiskMB: 4 * 1024,
Devices: []*structs.RequestedDevice{
{
Name: "gpu",
Count: 4,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[2]: {},
allocIDs[3]: {},
},
},
{
desc: "Device preemption not possible due to more instances needed than available",
currentAllocations: []*structs.Allocation{
createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{
CPU: 500,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]},
}),
createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 200,
MemoryMB: 512,
DiskMB: 4 * 1024,
}, &structs.AllocatedDeviceResource{
Type: "fpga",
Vendor: "intel",
Name: "F100",
DeviceIDs: []string{"fpga1"},
})},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 512,
DiskMB: 4 * 1024,
Devices: []*structs.RequestedDevice{
{
Name: "gpu",
Count: 6,
},
},
},
},
// This test case exercises the code path for a final filtering step that tries to
// minimize the number of preemptible allocations
{
desc: "Filter out allocs whose resource usage superset is also in the preemption list",
currentAllocations: []*structs.Allocation{
createAlloc(allocIDs[0], highPrioJob, &structs.Resources{
CPU: 1800,
MemoryMB: 2256,
DiskMB: 4 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 150,
},
},
}),
createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{
CPU: 1500,
MemoryMB: 256,
DiskMB: 5 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 100,
},
},
}),
createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{
CPU: 600,
MemoryMB: 256,
DiskMB: 5 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.200",
MBits: 300,
},
},
}),
},
nodeReservedCapacity: reservedNodeResources,
nodeCapacity: defaultNodeResources,
jobPriority: 100,
resourceAsk: &structs.Resources{
CPU: 1000,
MemoryMB: 256,
DiskMB: 5 * 1024,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
IP: "192.168.0.100",
MBits: 50,
},
},
},
preemptedAllocIDs: map[string]struct{}{
allocIDs[1]: {},
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
node := mock.Node()
node.NodeResources = tc.nodeCapacity
node.ReservedResources = tc.nodeReservedCapacity
state, ctx := testContext(t)
nodes := []*RankedNode{
{
Node: node,
},
}
state.UpsertNode(structs.MsgTypeTestSetup, 1000, node)
for _, alloc := range tc.currentAllocations {
alloc.NodeID = node.ID
}
require := require.New(t)
err := state.UpsertAllocs(structs.MsgTypeTestSetup, 1001, tc.currentAllocations)
require.Nil(err)
if tc.currentPreemptions != nil {
ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions
}
static := NewStaticRankIterator(ctx, nodes)
binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, testSchedulerConfig)
job := mock.Job()
job.Priority = tc.jobPriority
binPackIter.SetJob(job)
taskGroup := &structs.TaskGroup{
EphemeralDisk: &structs.EphemeralDisk{},
Tasks: []*structs.Task{
{
Name: "web",
Resources: tc.resourceAsk,
},
},
}
binPackIter.SetTaskGroup(taskGroup)
option := binPackIter.Next()
if tc.preemptedAllocIDs == nil {
require.Nil(option)
} else {
require.NotNil(option)
preemptedAllocs := option.PreemptedAllocs
require.Equal(len(tc.preemptedAllocIDs), len(preemptedAllocs))
for _, alloc := range preemptedAllocs {
_, ok := tc.preemptedAllocIDs[alloc.ID]
require.Truef(ok, "alloc %s was preempted unexpectedly", alloc.ID)
}
}
})
}
}
// TestPreemptionMultiple tests evicting multiple allocations in the same time
func TestPreemptionMultiple(t *testing.T) {
ci.Parallel(t)
// The test setup:
// * a node with 4 GPUs
// * a low priority job with 4 allocs, each is using 1 GPU
//
// Then schedule a high priority job needing 2 allocs, using 2 GPUs each.
// Expectation:
// All low priority allocs should preempted to accomodate the high priority job
h := NewHarness(t)
// node with 4 GPUs
node := mock.Node()
node.NodeResources = &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 4000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 8192,
},
Disk: structs.NodeDiskResources{
DiskMB: 100 * 1024,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
CIDR: "192.168.0.100/32",
MBits: 1000,
},
},
Devices: []*structs.NodeDeviceResource{
{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
Attributes: map[string]*psstructs.Attribute{
"memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB),
"cuda_cores": psstructs.NewIntAttribute(3584, ""),
"graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz),
"memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS),
},
Instances: []*structs.NodeDevice{
{
ID: "dev0",
Healthy: true,
},
{
ID: "dev1",
Healthy: true,
},
{
ID: "dev2",
Healthy: true,
},
{
ID: "dev3",
Healthy: true,
},
},
},
},
}
require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
// low priority job with 4 allocs using all 4 GPUs
lowPrioJob := mock.Job()
lowPrioJob.Priority = 5
lowPrioJob.TaskGroups[0].Count = 4
lowPrioJob.TaskGroups[0].Networks = nil
lowPrioJob.TaskGroups[0].Tasks[0].Services = nil
lowPrioJob.TaskGroups[0].Tasks[0].Resources.Networks = nil
lowPrioJob.TaskGroups[0].Tasks[0].Resources.Devices = structs.ResourceDevices{{
Name: "gpu",
Count: 1,
}}
require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, lowPrioJob))
allocs := []*structs.Allocation{}
allocIDs := map[string]struct{}{}
for i := 0; i < 4; i++ {
alloc := createAllocWithDevice(uuid.Generate(), lowPrioJob, lowPrioJob.TaskGroups[0].Tasks[0].Resources, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{fmt.Sprintf("dev%d", i)},
})
alloc.NodeID = node.ID
allocs = append(allocs, alloc)
allocIDs[alloc.ID] = struct{}{}
}
require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
// new high priority job with 2 allocs, each using 2 GPUs
highPrioJob := mock.Job()
highPrioJob.Priority = 100
highPrioJob.TaskGroups[0].Count = 2
highPrioJob.TaskGroups[0].Networks = nil
highPrioJob.TaskGroups[0].Tasks[0].Services = nil
highPrioJob.TaskGroups[0].Tasks[0].Resources.Networks = nil
highPrioJob.TaskGroups[0].Tasks[0].Resources.Devices = structs.ResourceDevices{{
Name: "gpu",
Count: 2,
}}
require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, highPrioJob))
// schedule
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: highPrioJob.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: highPrioJob.ID,
Status: structs.EvalStatusPending,
}
require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
// Process the evaluation
require.NoError(t, h.Process(NewServiceScheduler, eval))
require.Len(t, h.Plans, 1)
require.Contains(t, h.Plans[0].NodePreemptions, node.ID)
preempted := map[string]struct{}{}
for _, alloc := range h.Plans[0].NodePreemptions[node.ID] {
preempted[alloc.ID] = struct{}{}
}
require.Equal(t, allocIDs, preempted)
}
// helper method to create allocations with given jobs and resources
func createAlloc(id string, job *structs.Job, resource *structs.Resources) *structs.Allocation {
return createAllocInner(id, job, resource, nil, nil)
}
// helper method to create allocation with network at the task group level
func createAllocWithTaskgroupNetwork(id string, job *structs.Job, resource *structs.Resources, tgNet *structs.NetworkResource) *structs.Allocation {
return createAllocInner(id, job, resource, nil, tgNet)
}
func createAllocWithDevice(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource) *structs.Allocation {
return createAllocInner(id, job, resource, allocatedDevices, nil)
}
func createAllocInner(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource, tgNetwork *structs.NetworkResource) *structs.Allocation {
alloc := &structs.Allocation{
ID: id,
Job: job,
JobID: job.ID,
TaskResources: map[string]*structs.Resources{
"web": resource,
},
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusRunning,
TaskGroup: "web",
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: int64(resource.CPU),
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: int64(resource.MemoryMB),
},
Networks: resource.Networks,
},
},
},
}
if allocatedDevices != nil {
alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{allocatedDevices}
}
if tgNetwork != nil {
alloc.AllocatedResources.Shared = structs.AllocatedSharedResources{
Networks: []*structs.NetworkResource{tgNetwork},
}
}
return alloc
}