open-nomad/client/gc_test.go
Tim Gross d018fcbff7
allocrunner: provide factory function so we can build mock ARs (#17161)
Tools like `nomad-nodesim` are unable to implement a minimal implementation of
an allocrunner so that we can test the client communication without having to
lug around the entire allocrunner/taskrunner code base. The allocrunner was
implemented with an interface specifically for this purpose, but there were
circular imports that made it challenging to use in practice.

Move the AllocRunner interface into an inner package and provide a factory
function type. Provide a minimal test that exercises the new function so that
consumers have some idea of what the minimum implementation required is.
2023-05-12 13:29:44 -04:00

592 lines
16 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package client
import (
"fmt"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocrunner"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/stats"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
func gcConfig() *GCConfig {
return &GCConfig{
DiskUsageThreshold: 80,
InodeUsageThreshold: 70,
Interval: 1 * time.Minute,
ReservedDiskMB: 0,
MaxAllocs: 100,
}
}
// exitAllocRunner is a helper that updates the allocs on the given alloc
// runners to be terminal
func exitAllocRunner(runners ...interfaces.AllocRunner) {
for _, ar := range runners {
terminalAlloc := ar.Alloc().Copy()
terminalAlloc.DesiredStatus = structs.AllocDesiredStatusStop
ar.Update(terminalAlloc)
}
}
func TestIndexedGCAllocPQ(t *testing.T) {
ci.Parallel(t)
pq := NewIndexedGCAllocPQ()
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
ar3, cleanup3 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup3()
ar4, cleanup4 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup4()
pq.Push(ar1.Alloc().ID, ar1)
pq.Push(ar2.Alloc().ID, ar2)
pq.Push(ar3.Alloc().ID, ar3)
pq.Push(ar4.Alloc().ID, ar4)
allocID := pq.Pop().allocRunner.Alloc().ID
if allocID != ar1.Alloc().ID {
t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID)
}
allocID = pq.Pop().allocRunner.Alloc().ID
if allocID != ar2.Alloc().ID {
t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID)
}
allocID = pq.Pop().allocRunner.Alloc().ID
if allocID != ar3.Alloc().ID {
t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID)
}
allocID = pq.Pop().allocRunner.Alloc().ID
if allocID != ar4.Alloc().ID {
t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID)
}
gcAlloc := pq.Pop()
if gcAlloc != nil {
t.Fatalf("expected nil, got %v", gcAlloc)
}
}
// MockAllocCounter implements AllocCounter interface.
type MockAllocCounter struct {
allocs int
}
func (m *MockAllocCounter) NumAllocs() int {
return m.allocs
}
type MockStatsCollector struct {
availableValues []uint64
usedPercents []float64
inodePercents []float64
index int
}
func (m *MockStatsCollector) Collect() error {
return nil
}
func (m *MockStatsCollector) Stats() *stats.HostStats {
if len(m.availableValues) == 0 {
return nil
}
available := m.availableValues[m.index]
usedPercent := m.usedPercents[m.index]
inodePercent := m.inodePercents[m.index]
if m.index < len(m.availableValues)-1 {
m.index = m.index + 1
}
return &stats.HostStats{
AllocDirStats: &stats.DiskStats{
Available: available,
UsedPercent: usedPercent,
InodesUsedPercent: inodePercent,
},
}
}
func TestAllocGarbageCollector_MarkForCollection(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig())
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gcAlloc := gc.allocRunners.Pop()
if gcAlloc == nil || gcAlloc.allocRunner != ar1 {
t.Fatalf("bad gcAlloc: %v", gcAlloc)
}
}
func TestAllocGarbageCollector_Collect(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig())
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
gc.Collect(ar1.Alloc().ID)
gcAlloc := gc.allocRunners.Pop()
if gcAlloc == nil || gcAlloc.allocRunner != ar2 {
t.Fatalf("bad gcAlloc: %v", gcAlloc)
}
}
func TestAllocGarbageCollector_CollectAll(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig())
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
gc.CollectAll()
gcAlloc := gc.allocRunners.Pop()
if gcAlloc != nil {
t.Fatalf("bad gcAlloc: %v", gcAlloc)
}
}
func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
// Make stats collector report 200MB free out of which 20MB is reserved
statsCollector.availableValues = []uint64{200 * MB}
statsCollector.usedPercents = []float64{0}
statsCollector.inodePercents = []float64{0}
alloc := mock.Alloc()
alloc.AllocatedResources.Shared.DiskMB = 150
if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil {
t.Fatalf("err: %v", err)
}
// When we have enough disk available and don't need to do any GC so we
// should have two ARs in the GC queue
for i := 0; i < 2; i++ {
if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil {
t.Fatalf("err: %v", gcAlloc)
}
}
}
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
// Make stats collector report 80MB and 175MB free in subsequent calls
statsCollector.availableValues = []uint64{80 * MB, 80 * MB, 175 * MB}
statsCollector.usedPercents = []float64{0, 0, 0}
statsCollector.inodePercents = []float64{0, 0, 0}
alloc := mock.Alloc()
alloc.AllocatedResources.Shared.DiskMB = 150
if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil {
t.Fatalf("err: %v", err)
}
// We should be GC-ing one alloc
if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil {
t.Fatalf("err: %v", gcAlloc)
}
if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil {
t.Fatalf("gcAlloc: %v", gcAlloc)
}
}
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
// Make stats collector report 80MB and 95MB free in subsequent calls
statsCollector.availableValues = []uint64{80 * MB, 80 * MB, 95 * MB}
statsCollector.usedPercents = []float64{0, 0, 0}
statsCollector.inodePercents = []float64{0, 0, 0}
alloc := mock.Alloc()
alloc.AllocatedResources.Shared.DiskMB = 150
if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil {
t.Fatalf("err: %v", err)
}
// We should be GC-ing all the alloc runners
if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil {
t.Fatalf("gcAlloc: %v", gcAlloc)
}
}
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
alloc := mock.Alloc()
alloc.AllocatedResources.Shared.DiskMB = 150
if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil {
t.Fatalf("err: %v", err)
}
// We should be GC-ing one alloc
if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil {
t.Fatalf("err: %v", gcAlloc)
}
if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil {
t.Fatalf("gcAlloc: %v", gcAlloc)
}
}
// TestAllocGarbageCollector_MakeRoomFor_MaxAllocs asserts that when making room for new
// allocs, terminal allocs are GC'd until old_allocs + new_allocs <= limit
func TestAllocGarbageCollector_MakeRoomFor_MaxAllocs(t *testing.T) {
ci.Parallel(t)
const maxAllocs = 6
require := require.New(t)
server, serverAddr, cleanupS := testServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, server.RPC)
client, cleanup := TestClient(t, func(c *config.Config) {
c.GCMaxAllocs = maxAllocs
c.GCDiskUsageThreshold = 100
c.GCInodeUsageThreshold = 100
c.GCParallelDestroys = 1
c.GCInterval = time.Hour
c.RPCHandler = server
c.Servers = []string{serverAddr}
c.ConsulConfig.ClientAutoJoin = new(bool)
})
defer cleanup()
waitTilNodeReady(client, t)
job := mock.Job()
job.TaskGroups[0].Count = 1
job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{
"run_for": "30s",
}
index := uint64(98)
nextIndex := func() uint64 {
index++
return index
}
upsertJobFn := func(server *nomad.Server, j *structs.Job) {
state := server.State()
require.NoError(state.UpsertJob(structs.MsgTypeTestSetup, nextIndex(), nil, j))
require.NoError(state.UpsertJobSummary(nextIndex(), mock.JobSummary(j.ID)))
}
// Insert the Job
upsertJobFn(server, job)
upsertAllocFn := func(server *nomad.Server, a *structs.Allocation) {
state := server.State()
require.NoError(state.UpsertAllocs(structs.MsgTypeTestSetup, nextIndex(), []*structs.Allocation{a}))
}
upsertNewAllocFn := func(server *nomad.Server, j *structs.Job) *structs.Allocation {
alloc := mock.Alloc()
alloc.Job = j
alloc.JobID = j.ID
alloc.NodeID = client.NodeID()
upsertAllocFn(server, alloc)
return alloc.Copy()
}
var allocations []*structs.Allocation
// Fill the node with allocations
for i := 0; i < maxAllocs; i++ {
allocations = append(allocations, upsertNewAllocFn(server, job))
}
// Wait until the allocations are ready
testutil.WaitForResult(func() (bool, error) {
ar := len(client.getAllocRunners())
return ar == maxAllocs, fmt.Errorf("Expected %d allocs, got %d", maxAllocs, ar)
}, func(err error) {
t.Fatalf("Allocs did not start: %v", err)
})
// Mark the first three as terminal
for i := 0; i < 3; i++ {
allocations[i].DesiredStatus = structs.AllocDesiredStatusStop
upsertAllocFn(server, allocations[i].Copy())
}
// Wait until the allocations are stopped
testutil.WaitForResult(func() (bool, error) {
ar := client.getAllocRunners()
stopped := 0
for _, r := range ar {
if r.Alloc().TerminalStatus() {
stopped++
}
}
return stopped == 3, fmt.Errorf("Expected %d terminal allocs, got %d", 3, stopped)
}, func(err error) {
t.Fatalf("Allocs did not terminate: %v", err)
})
// Upsert a new allocation
// This does not get appended to `allocations` as we do not use them again.
upsertNewAllocFn(server, job)
// A single allocation should be GC'd
testutil.WaitForResult(func() (bool, error) {
ar := client.getAllocRunners()
destroyed := 0
for _, r := range ar {
if r.IsDestroyed() {
destroyed++
}
}
return destroyed == 1, fmt.Errorf("Expected %d gc'd ars, got %d", 1, destroyed)
}, func(err error) {
t.Fatalf("Allocs did not get GC'd: %v", err)
})
// Upsert a new allocation
// This does not get appended to `allocations` as we do not use them again.
upsertNewAllocFn(server, job)
// 2 allocations should be GC'd
testutil.WaitForResult(func() (bool, error) {
ar := client.getAllocRunners()
destroyed := 0
for _, r := range ar {
if r.IsDestroyed() {
destroyed++
}
}
return destroyed == 2, fmt.Errorf("Expected %d gc'd ars, got %d", 2, destroyed)
}, func(err error) {
t.Fatalf("Allocs did not get GC'd: %v", err)
})
// check that all 8 get run eventually
testutil.WaitForResult(func() (bool, error) {
ar := client.getAllocRunners()
if len(ar) != 8 {
return false, fmt.Errorf("expected 8 ARs, found %d: %v", len(ar), ar)
}
return true, nil
}, func(err error) {
require.NoError(err)
})
}
func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
statsCollector.availableValues = []uint64{1000}
statsCollector.usedPercents = []float64{20}
statsCollector.inodePercents = []float64{10}
if err := gc.keepUsageBelowThreshold(); err != nil {
t.Fatalf("err: %v", err)
}
// We shouldn't GC any of the allocs since the used percent values are below
// threshold
for i := 0; i < 2; i++ {
if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil {
t.Fatalf("err: %v", gcAlloc)
}
}
}
func TestAllocGarbageCollector_UsedPercentThreshold(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
statsCollector := &MockStatsCollector{}
conf := gcConfig()
conf.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf)
ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup1()
ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc())
defer cleanup2()
go ar1.Run()
go ar2.Run()
gc.MarkForCollection(ar1.Alloc().ID, ar1)
gc.MarkForCollection(ar2.Alloc().ID, ar2)
// Exit the alloc runners
exitAllocRunner(ar1, ar2)
statsCollector.availableValues = []uint64{1000, 800}
statsCollector.usedPercents = []float64{85, 60}
statsCollector.inodePercents = []float64{50, 30}
if err := gc.keepUsageBelowThreshold(); err != nil {
t.Fatalf("err: %v", err)
}
// We should be GC-ing only one of the alloc runners since the second time
// used percent returns a number below threshold.
if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil {
t.Fatalf("err: %v", gcAlloc)
}
if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil {
t.Fatalf("gcAlloc: %v", gcAlloc)
}
}