open-nomad/nomad/structs/funcs_test.go
Michael Schurter 4c5a0cae35 core: fix node reservation scoring
The BinPackIter accounted for node reservations twice when scoring nodes
which could bias scores toward nodes with reservations.

Pseudo-code for previous algorithm:
```
	proposed  = reservedResources + sum(allocsResources)
	available = nodeResources - reservedResources
	score     = 1 - (proposed / available)
```

The node's reserved resources are added to the total resources used by
allocations, and then the node's reserved resources are later
substracted from the node's overall resources.

The new algorithm is:
```
	proposed  = sum(allocResources)
	available = nodeResources - reservedResources
	score     = 1 - (proposed / available)
```

The node's reserved resources are no longer added to the total resources
used by allocations.

My guess as to how this bug happened is that the resource utilization
variable (`util`) is calculated and returned by the `AllocsFit` function
which needs to take reserved resources into account as a basic
feasibility check.

To avoid re-calculating alloc resource usage (because there may be a
large number of allocs), we reused `util` in the `ScoreFit` function.
`ScoreFit` properly accounts for reserved resources by subtracting them
from the node's overall resources. However since `util` _also_ took
reserved resources into account the score would be incorrect.

Prior to the fix the added test output:
```
Node: reserved     Score: 1.0000
Node: reserved2    Score: 1.0000
Node: no-reserved  Score: 0.9741
```

The scores being 1.0 for *both* nodes with reserved resources is a good
hint something is wrong as they should receive different scores. Upon
further inspection the double accounting of reserved resources caused
their scores to be >1.0 and clamped.

After the fix the added test outputs:
```
Node: no-reserved  Score: 0.9741
Node: reserved     Score: 0.9480
Node: reserved2    Score: 0.8717
```
2020-04-15 15:13:30 -07:00

761 lines
16 KiB
Go

package structs
import (
"encoding/base64"
"fmt"
"testing"
lru "github.com/hashicorp/golang-lru"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRemoveAllocs(t *testing.T) {
l := []*Allocation{
{ID: "foo"},
{ID: "bar"},
{ID: "baz"},
{ID: "zip"},
}
out := RemoveAllocs(l, []*Allocation{l[1], l[3]})
if len(out) != 2 {
t.Fatalf("bad: %#v", out)
}
if out[0].ID != "foo" && out[1].ID != "baz" {
t.Fatalf("bad: %#v", out)
}
}
func TestFilterTerminalAllocs(t *testing.T) {
l := []*Allocation{
{
ID: "bar",
Name: "myname1",
DesiredStatus: AllocDesiredStatusEvict,
},
{ID: "baz", DesiredStatus: AllocDesiredStatusStop},
{
ID: "foo",
DesiredStatus: AllocDesiredStatusRun,
ClientStatus: AllocClientStatusPending,
},
{
ID: "bam",
Name: "myname",
DesiredStatus: AllocDesiredStatusRun,
ClientStatus: AllocClientStatusComplete,
CreateIndex: 5,
},
{
ID: "lol",
Name: "myname",
DesiredStatus: AllocDesiredStatusRun,
ClientStatus: AllocClientStatusComplete,
CreateIndex: 2,
},
}
out, terminalAllocs := FilterTerminalAllocs(l)
if len(out) != 1 {
t.Fatalf("bad: %#v", out)
}
if out[0].ID != "foo" {
t.Fatalf("bad: %#v", out)
}
if len(terminalAllocs) != 3 {
for _, o := range terminalAllocs {
fmt.Printf("%#v \n", o)
}
t.Fatalf("bad: %#v", terminalAllocs)
}
if terminalAllocs["myname"].ID != "bam" {
t.Fatalf("bad: %#v", terminalAllocs["myname"])
}
}
// COMPAT(0.11): Remove in 0.11
func TestAllocsFit_PortsOvercommitted_Old(t *testing.T) {
n := &Node{
Resources: &Resources{
Networks: []*NetworkResource{
{
Device: "eth0",
CIDR: "10.0.0.0/8",
MBits: 100,
},
},
},
}
a1 := &Allocation{
Job: &Job{
TaskGroups: []*TaskGroup{
{
Name: "web",
EphemeralDisk: DefaultEphemeralDisk(),
},
},
},
TaskResources: map[string]*Resources{
"web": {
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 8000, 80}},
},
},
},
},
}
// Should fit one allocation
fit, dim, _, err := AllocsFit(n, []*Allocation{a1}, nil, false)
if err != nil {
t.Fatalf("err: %v", err)
}
if !fit {
t.Fatalf("Bad: %s", dim)
}
// Should not fit second allocation
fit, _, _, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
if err != nil {
t.Fatalf("err: %v", err)
}
if fit {
t.Fatalf("Bad")
}
}
// COMPAT(0.11): Remove in 0.11
func TestAllocsFit_Old(t *testing.T) {
require := require.New(t)
n := &Node{
Resources: &Resources{
CPU: 2000,
MemoryMB: 2048,
DiskMB: 10000,
Networks: []*NetworkResource{
{
Device: "eth0",
CIDR: "10.0.0.0/8",
MBits: 100,
},
},
},
Reserved: &Resources{
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 80, 0}},
},
},
},
}
a1 := &Allocation{
Resources: &Resources{
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 8000, 80}},
},
},
},
}
// Should fit one allocation
fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false)
require.NoError(err)
require.True(fit)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
// Should not fit second allocation
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
require.NoError(err)
require.False(fit)
// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
}
// COMPAT(0.11): Remove in 0.11
func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
require := require.New(t)
n := &Node{
Resources: &Resources{
CPU: 2000,
MemoryMB: 2048,
DiskMB: 10000,
Networks: []*NetworkResource{
{
Device: "eth0",
CIDR: "10.0.0.0/8",
MBits: 100,
},
},
},
Reserved: &Resources{
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 80, 0}},
},
},
},
}
a1 := &Allocation{
Resources: &Resources{
CPU: 1000,
MemoryMB: 1024,
DiskMB: 5000,
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 8000, 0}},
},
},
},
}
// Should fit one allocation
fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false)
require.NoError(err)
require.True(fit)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
// Should fit second allocation since it is terminal
a2 := a1.Copy()
a2.DesiredStatus = AllocDesiredStatusStop
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a2}, nil, false)
require.NoError(err)
require.True(fit)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
}
func TestAllocsFit(t *testing.T) {
require := require.New(t)
n := &Node{
NodeResources: &NodeResources{
Cpu: NodeCpuResources{
CpuShares: 2000,
},
Memory: NodeMemoryResources{
MemoryMB: 2048,
},
Disk: NodeDiskResources{
DiskMB: 10000,
},
Networks: []*NetworkResource{
{
Device: "eth0",
CIDR: "10.0.0.0/8",
MBits: 100,
},
},
},
ReservedResources: &NodeReservedResources{
Cpu: NodeReservedCpuResources{
CpuShares: 1000,
},
Memory: NodeReservedMemoryResources{
MemoryMB: 1024,
},
Disk: NodeReservedDiskResources{
DiskMB: 5000,
},
Networks: NodeReservedNetworkResources{
ReservedHostPorts: "80",
},
},
}
a1 := &Allocation{
AllocatedResources: &AllocatedResources{
Tasks: map[string]*AllocatedTaskResources{
"web": {
Cpu: AllocatedCpuResources{
CpuShares: 1000,
},
Memory: AllocatedMemoryResources{
MemoryMB: 1024,
},
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 8000, 0}},
},
},
},
},
Shared: AllocatedSharedResources{
DiskMB: 5000,
},
},
}
// Should fit one allocation
fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false)
require.NoError(err)
require.True(fit)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
// Should not fit second allocation
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
require.NoError(err)
require.False(fit)
// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
}
func TestAllocsFit_TerminalAlloc(t *testing.T) {
require := require.New(t)
n := &Node{
NodeResources: &NodeResources{
Cpu: NodeCpuResources{
CpuShares: 2000,
},
Memory: NodeMemoryResources{
MemoryMB: 2048,
},
Disk: NodeDiskResources{
DiskMB: 10000,
},
Networks: []*NetworkResource{
{
Device: "eth0",
CIDR: "10.0.0.0/8",
IP: "10.0.0.1",
MBits: 100,
},
},
},
ReservedResources: &NodeReservedResources{
Cpu: NodeReservedCpuResources{
CpuShares: 1000,
},
Memory: NodeReservedMemoryResources{
MemoryMB: 1024,
},
Disk: NodeReservedDiskResources{
DiskMB: 5000,
},
Networks: NodeReservedNetworkResources{
ReservedHostPorts: "80",
},
},
}
a1 := &Allocation{
AllocatedResources: &AllocatedResources{
Tasks: map[string]*AllocatedTaskResources{
"web": {
Cpu: AllocatedCpuResources{
CpuShares: 1000,
},
Memory: AllocatedMemoryResources{
MemoryMB: 1024,
},
Networks: []*NetworkResource{
{
Device: "eth0",
IP: "10.0.0.1",
MBits: 50,
ReservedPorts: []Port{{"main", 8000, 80}},
},
},
},
},
Shared: AllocatedSharedResources{
DiskMB: 5000,
},
},
}
// Should fit one allocation
fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false)
require.NoError(err)
require.True(fit)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
// Should fit second allocation since it is terminal
a2 := a1.Copy()
a2.DesiredStatus = AllocDesiredStatusStop
fit, dim, used, err := AllocsFit(n, []*Allocation{a1, a2}, nil, false)
require.NoError(err)
require.True(fit, dim)
// Sanity check the used resources
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
}
// Tests that AllocsFit detects device collisions
func TestAllocsFit_Devices(t *testing.T) {
require := require.New(t)
n := MockNvidiaNode()
a1 := &Allocation{
AllocatedResources: &AllocatedResources{
Tasks: map[string]*AllocatedTaskResources{
"web": {
Cpu: AllocatedCpuResources{
CpuShares: 1000,
},
Memory: AllocatedMemoryResources{
MemoryMB: 1024,
},
Devices: []*AllocatedDeviceResource{
{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{n.NodeResources.Devices[0].Instances[0].ID},
},
},
},
},
Shared: AllocatedSharedResources{
DiskMB: 5000,
},
},
}
a2 := a1.Copy()
a2.AllocatedResources.Tasks["web"] = &AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 1000,
},
Memory: AllocatedMemoryResources{
MemoryMB: 1024,
},
Devices: []*AllocatedDeviceResource{
{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{n.NodeResources.Devices[0].Instances[0].ID}, // Use the same ID
},
},
}
// Should fit one allocation
fit, _, _, err := AllocsFit(n, []*Allocation{a1}, nil, true)
require.NoError(err)
require.True(fit)
// Should not fit second allocation
fit, msg, _, err := AllocsFit(n, []*Allocation{a1, a2}, nil, true)
require.NoError(err)
require.False(fit)
require.Equal("device oversubscribed", msg)
// Should not fit second allocation but won't detect since we disabled
// devices
fit, _, _, err = AllocsFit(n, []*Allocation{a1, a2}, nil, false)
require.NoError(err)
require.True(fit)
}
// COMPAT(0.11): Remove in 0.11
func TestScoreFit_Old(t *testing.T) {
node := &Node{}
node.Resources = &Resources{
CPU: 4096,
MemoryMB: 8192,
}
node.Reserved = &Resources{
CPU: 2048,
MemoryMB: 4096,
}
// Test a perfect fit
util := &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 2048,
},
Memory: AllocatedMemoryResources{
MemoryMB: 4096,
},
},
}
score := ScoreFit(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}
// Test the worst fit
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 0,
},
Memory: AllocatedMemoryResources{
MemoryMB: 0,
},
},
}
score = ScoreFit(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}
// Test a mid-case scenario
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 1024,
},
Memory: AllocatedMemoryResources{
MemoryMB: 2048,
},
},
}
score = ScoreFit(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)
}
}
func TestScoreFit(t *testing.T) {
node := &Node{}
node.NodeResources = &NodeResources{
Cpu: NodeCpuResources{
CpuShares: 4096,
},
Memory: NodeMemoryResources{
MemoryMB: 8192,
},
}
node.ReservedResources = &NodeReservedResources{
Cpu: NodeReservedCpuResources{
CpuShares: 2048,
},
Memory: NodeReservedMemoryResources{
MemoryMB: 4096,
},
}
// Test a perfect fit
util := &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 2048,
},
Memory: AllocatedMemoryResources{
MemoryMB: 4096,
},
},
}
score := ScoreFit(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}
// Test the worst fit
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 0,
},
Memory: AllocatedMemoryResources{
MemoryMB: 0,
},
},
}
score = ScoreFit(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}
// Test a mid-case scenario
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 1024,
},
Memory: AllocatedMemoryResources{
MemoryMB: 2048,
},
},
}
score = ScoreFit(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)
}
}
func TestACLPolicyListHash(t *testing.T) {
h1 := ACLPolicyListHash(nil)
assert.NotEqual(t, "", h1)
p1 := &ACLPolicy{
Name: fmt.Sprintf("policy-%s", uuid.Generate()),
Description: "Super cool policy!",
Rules: `
namespace "default" {
policy = "write"
}
node {
policy = "read"
}
agent {
policy = "read"
}
`,
CreateIndex: 10,
ModifyIndex: 20,
}
h2 := ACLPolicyListHash([]*ACLPolicy{p1})
assert.NotEqual(t, "", h2)
assert.NotEqual(t, h1, h2)
// Create P2 as copy of P1 with new name
p2 := &ACLPolicy{}
*p2 = *p1
p2.Name = fmt.Sprintf("policy-%s", uuid.Generate())
h3 := ACLPolicyListHash([]*ACLPolicy{p1, p2})
assert.NotEqual(t, "", h3)
assert.NotEqual(t, h2, h3)
h4 := ACLPolicyListHash([]*ACLPolicy{p2})
assert.NotEqual(t, "", h4)
assert.NotEqual(t, h3, h4)
// ModifyIndex should change the hash
p2.ModifyIndex++
h5 := ACLPolicyListHash([]*ACLPolicy{p2})
assert.NotEqual(t, "", h5)
assert.NotEqual(t, h4, h5)
}
func TestCompileACLObject(t *testing.T) {
p1 := &ACLPolicy{
Name: fmt.Sprintf("policy-%s", uuid.Generate()),
Description: "Super cool policy!",
Rules: `
namespace "default" {
policy = "write"
}
node {
policy = "read"
}
agent {
policy = "read"
}
`,
CreateIndex: 10,
ModifyIndex: 20,
}
// Create P2 as copy of P1 with new name
p2 := &ACLPolicy{}
*p2 = *p1
p2.Name = fmt.Sprintf("policy-%s", uuid.Generate())
// Create a small cache
cache, err := lru.New2Q(16)
assert.Nil(t, err)
// Test compilation
aclObj, err := CompileACLObject(cache, []*ACLPolicy{p1})
assert.Nil(t, err)
assert.NotNil(t, aclObj)
// Should get the same object
aclObj2, err := CompileACLObject(cache, []*ACLPolicy{p1})
assert.Nil(t, err)
if aclObj != aclObj2 {
t.Fatalf("expected the same object")
}
// Should get another object
aclObj3, err := CompileACLObject(cache, []*ACLPolicy{p1, p2})
assert.Nil(t, err)
assert.NotNil(t, aclObj3)
if aclObj == aclObj3 {
t.Fatalf("unexpected same object")
}
// Should be order independent
aclObj4, err := CompileACLObject(cache, []*ACLPolicy{p2, p1})
assert.Nil(t, err)
assert.NotNil(t, aclObj4)
if aclObj3 != aclObj4 {
t.Fatalf("expected same object")
}
}
// TestGenerateMigrateToken asserts the migrate token is valid for use in HTTP
// headers and CompareMigrateToken works as expected.
func TestGenerateMigrateToken(t *testing.T) {
assert := assert.New(t)
allocID := uuid.Generate()
nodeSecret := uuid.Generate()
token, err := GenerateMigrateToken(allocID, nodeSecret)
assert.Nil(err)
_, err = base64.URLEncoding.DecodeString(token)
assert.Nil(err)
assert.True(CompareMigrateToken(allocID, nodeSecret, token))
assert.False(CompareMigrateToken("x", nodeSecret, token))
assert.False(CompareMigrateToken(allocID, "x", token))
assert.False(CompareMigrateToken(allocID, nodeSecret, "x"))
token2, err := GenerateMigrateToken("x", nodeSecret)
assert.Nil(err)
assert.False(CompareMigrateToken(allocID, nodeSecret, token2))
assert.True(CompareMigrateToken("x", nodeSecret, token2))
}