// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 package client import ( "fmt" "testing" "time" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/stats" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" ) func gcConfig() *GCConfig { return &GCConfig{ DiskUsageThreshold: 80, InodeUsageThreshold: 70, Interval: 1 * time.Minute, ReservedDiskMB: 0, MaxAllocs: 100, } } // exitAllocRunner is a helper that updates the allocs on the given alloc // runners to be terminal func exitAllocRunner(runners ...interfaces.AllocRunner) { for _, ar := range runners { terminalAlloc := ar.Alloc().Copy() terminalAlloc.DesiredStatus = structs.AllocDesiredStatusStop ar.Update(terminalAlloc) } } func TestIndexedGCAllocPQ(t *testing.T) { ci.Parallel(t) pq := NewIndexedGCAllocPQ() ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() ar3, cleanup3 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup3() ar4, cleanup4 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup4() pq.Push(ar1.Alloc().ID, ar1) pq.Push(ar2.Alloc().ID, ar2) pq.Push(ar3.Alloc().ID, ar3) pq.Push(ar4.Alloc().ID, ar4) allocID := pq.Pop().allocRunner.Alloc().ID if allocID != ar1.Alloc().ID { t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID) } allocID = pq.Pop().allocRunner.Alloc().ID if allocID != ar2.Alloc().ID { t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID) } allocID = pq.Pop().allocRunner.Alloc().ID if allocID != ar3.Alloc().ID { t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID) } allocID = pq.Pop().allocRunner.Alloc().ID if allocID != ar4.Alloc().ID { t.Fatalf("expected alloc %v, got %v", allocID, ar1.Alloc().ID) } gcAlloc := pq.Pop() if gcAlloc != nil { t.Fatalf("expected nil, got %v", gcAlloc) } } // MockAllocCounter implements AllocCounter interface. type MockAllocCounter struct { allocs int } func (m *MockAllocCounter) NumAllocs() int { return m.allocs } type MockStatsCollector struct { availableValues []uint64 usedPercents []float64 inodePercents []float64 index int } func (m *MockStatsCollector) Collect() error { return nil } func (m *MockStatsCollector) Stats() *stats.HostStats { if len(m.availableValues) == 0 { return nil } available := m.availableValues[m.index] usedPercent := m.usedPercents[m.index] inodePercent := m.inodePercents[m.index] if m.index < len(m.availableValues)-1 { m.index = m.index + 1 } return &stats.HostStats{ AllocDirStats: &stats.DiskStats{ Available: available, UsedPercent: usedPercent, InodesUsedPercent: inodePercent, }, } } func TestAllocGarbageCollector_MarkForCollection(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() gc.MarkForCollection(ar1.Alloc().ID, ar1) gcAlloc := gc.allocRunners.Pop() if gcAlloc == nil || gcAlloc.allocRunner != ar1 { t.Fatalf("bad gcAlloc: %v", gcAlloc) } } func TestAllocGarbageCollector_Collect(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) gc.Collect(ar1.Alloc().ID) gcAlloc := gc.allocRunners.Pop() if gcAlloc == nil || gcAlloc.allocRunner != ar2 { t.Fatalf("bad gcAlloc: %v", gcAlloc) } } func TestAllocGarbageCollector_CollectAll(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) gc.CollectAll() gcAlloc := gc.allocRunners.Pop() if gcAlloc != nil { t.Fatalf("bad gcAlloc: %v", gcAlloc) } } func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) // Make stats collector report 200MB free out of which 20MB is reserved statsCollector.availableValues = []uint64{200 * MB} statsCollector.usedPercents = []float64{0} statsCollector.inodePercents = []float64{0} alloc := mock.Alloc() alloc.AllocatedResources.Shared.DiskMB = 150 if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil { t.Fatalf("err: %v", err) } // When we have enough disk available and don't need to do any GC so we // should have two ARs in the GC queue for i := 0; i < 2; i++ { if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil { t.Fatalf("err: %v", gcAlloc) } } } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) // Make stats collector report 80MB and 175MB free in subsequent calls statsCollector.availableValues = []uint64{80 * MB, 80 * MB, 175 * MB} statsCollector.usedPercents = []float64{0, 0, 0} statsCollector.inodePercents = []float64{0, 0, 0} alloc := mock.Alloc() alloc.AllocatedResources.Shared.DiskMB = 150 if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil { t.Fatalf("err: %v", err) } // We should be GC-ing one alloc if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil { t.Fatalf("err: %v", gcAlloc) } if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil { t.Fatalf("gcAlloc: %v", gcAlloc) } } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) // Make stats collector report 80MB and 95MB free in subsequent calls statsCollector.availableValues = []uint64{80 * MB, 80 * MB, 95 * MB} statsCollector.usedPercents = []float64{0, 0, 0} statsCollector.inodePercents = []float64{0, 0, 0} alloc := mock.Alloc() alloc.AllocatedResources.Shared.DiskMB = 150 if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil { t.Fatalf("err: %v", err) } // We should be GC-ing all the alloc runners if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil { t.Fatalf("gcAlloc: %v", gcAlloc) } } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) alloc := mock.Alloc() alloc.AllocatedResources.Shared.DiskMB = 150 if err := gc.MakeRoomFor([]*structs.Allocation{alloc}); err != nil { t.Fatalf("err: %v", err) } // We should be GC-ing one alloc if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil { t.Fatalf("err: %v", gcAlloc) } if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil { t.Fatalf("gcAlloc: %v", gcAlloc) } } // TestAllocGarbageCollector_MakeRoomFor_MaxAllocs asserts that when making room for new // allocs, terminal allocs are GC'd until old_allocs + new_allocs <= limit func TestAllocGarbageCollector_MakeRoomFor_MaxAllocs(t *testing.T) { ci.Parallel(t) const maxAllocs = 6 require := require.New(t) server, serverAddr, cleanupS := testServer(t, nil) defer cleanupS() testutil.WaitForLeader(t, server.RPC) client, cleanup := TestClient(t, func(c *config.Config) { c.GCMaxAllocs = maxAllocs c.GCDiskUsageThreshold = 100 c.GCInodeUsageThreshold = 100 c.GCParallelDestroys = 1 c.GCInterval = time.Hour c.RPCHandler = server c.Servers = []string{serverAddr} c.ConsulConfig.ClientAutoJoin = new(bool) }) defer cleanup() waitTilNodeReady(client, t) job := mock.Job() job.TaskGroups[0].Count = 1 job.TaskGroups[0].Tasks[0].Driver = "mock_driver" job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ "run_for": "30s", } index := uint64(98) nextIndex := func() uint64 { index++ return index } upsertJobFn := func(server *nomad.Server, j *structs.Job) { state := server.State() require.NoError(state.UpsertJob(structs.MsgTypeTestSetup, nextIndex(), nil, j)) require.NoError(state.UpsertJobSummary(nextIndex(), mock.JobSummary(j.ID))) } // Insert the Job upsertJobFn(server, job) upsertAllocFn := func(server *nomad.Server, a *structs.Allocation) { state := server.State() require.NoError(state.UpsertAllocs(structs.MsgTypeTestSetup, nextIndex(), []*structs.Allocation{a})) } upsertNewAllocFn := func(server *nomad.Server, j *structs.Job) *structs.Allocation { alloc := mock.Alloc() alloc.Job = j alloc.JobID = j.ID alloc.NodeID = client.NodeID() upsertAllocFn(server, alloc) return alloc.Copy() } var allocations []*structs.Allocation // Fill the node with allocations for i := 0; i < maxAllocs; i++ { allocations = append(allocations, upsertNewAllocFn(server, job)) } // Wait until the allocations are ready testutil.WaitForResult(func() (bool, error) { ar := len(client.getAllocRunners()) return ar == maxAllocs, fmt.Errorf("Expected %d allocs, got %d", maxAllocs, ar) }, func(err error) { t.Fatalf("Allocs did not start: %v", err) }) // Mark the first three as terminal for i := 0; i < 3; i++ { allocations[i].DesiredStatus = structs.AllocDesiredStatusStop upsertAllocFn(server, allocations[i].Copy()) } // Wait until the allocations are stopped testutil.WaitForResult(func() (bool, error) { ar := client.getAllocRunners() stopped := 0 for _, r := range ar { if r.Alloc().TerminalStatus() { stopped++ } } return stopped == 3, fmt.Errorf("Expected %d terminal allocs, got %d", 3, stopped) }, func(err error) { t.Fatalf("Allocs did not terminate: %v", err) }) // Upsert a new allocation // This does not get appended to `allocations` as we do not use them again. upsertNewAllocFn(server, job) // A single allocation should be GC'd testutil.WaitForResult(func() (bool, error) { ar := client.getAllocRunners() destroyed := 0 for _, r := range ar { if r.IsDestroyed() { destroyed++ } } return destroyed == 1, fmt.Errorf("Expected %d gc'd ars, got %d", 1, destroyed) }, func(err error) { t.Fatalf("Allocs did not get GC'd: %v", err) }) // Upsert a new allocation // This does not get appended to `allocations` as we do not use them again. upsertNewAllocFn(server, job) // 2 allocations should be GC'd testutil.WaitForResult(func() (bool, error) { ar := client.getAllocRunners() destroyed := 0 for _, r := range ar { if r.IsDestroyed() { destroyed++ } } return destroyed == 2, fmt.Errorf("Expected %d gc'd ars, got %d", 2, destroyed) }, func(err error) { t.Fatalf("Allocs did not get GC'd: %v", err) }) // check that all 8 get run eventually testutil.WaitForResult(func() (bool, error) { ar := client.getAllocRunners() if len(ar) != 8 { return false, fmt.Errorf("expected 8 ARs, found %d: %v", len(ar), ar) } return true, nil }, func(err error) { require.NoError(err) }) } func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) statsCollector.availableValues = []uint64{1000} statsCollector.usedPercents = []float64{20} statsCollector.inodePercents = []float64{10} if err := gc.keepUsageBelowThreshold(); err != nil { t.Fatalf("err: %v", err) } // We shouldn't GC any of the allocs since the used percent values are below // threshold for i := 0; i < 2; i++ { if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil { t.Fatalf("err: %v", gcAlloc) } } } func TestAllocGarbageCollector_UsedPercentThreshold(t *testing.T) { ci.Parallel(t) logger := testlog.HCLogger(t) statsCollector := &MockStatsCollector{} conf := gcConfig() conf.ReservedDiskMB = 20 gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) ar1, cleanup1 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup1() ar2, cleanup2 := allocrunner.TestAllocRunnerFromAlloc(t, mock.Alloc()) defer cleanup2() go ar1.Run() go ar2.Run() gc.MarkForCollection(ar1.Alloc().ID, ar1) gc.MarkForCollection(ar2.Alloc().ID, ar2) // Exit the alloc runners exitAllocRunner(ar1, ar2) statsCollector.availableValues = []uint64{1000, 800} statsCollector.usedPercents = []float64{85, 60} statsCollector.inodePercents = []float64{50, 30} if err := gc.keepUsageBelowThreshold(); err != nil { t.Fatalf("err: %v", err) } // We should be GC-ing only one of the alloc runners since the second time // used percent returns a number below threshold. if gcAlloc := gc.allocRunners.Pop(); gcAlloc == nil { t.Fatalf("err: %v", gcAlloc) } if gcAlloc := gc.allocRunners.Pop(); gcAlloc != nil { t.Fatalf("gcAlloc: %v", gcAlloc) } }