459 lines
12 KiB
Go
459 lines
12 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package client
|
|
|
|
import (
|
|
"container/heap"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
"github.com/hashicorp/nomad/client/stats"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const (
|
|
// MB is a constant which converts values in bytes to MB
|
|
MB = 1024 * 1024
|
|
)
|
|
|
|
// GCConfig allows changing the behaviour of the garbage collector
|
|
type GCConfig struct {
|
|
// MaxAllocs is the maximum number of allocations to track before a GC
|
|
// is triggered.
|
|
MaxAllocs int
|
|
DiskUsageThreshold float64
|
|
InodeUsageThreshold float64
|
|
Interval time.Duration
|
|
ReservedDiskMB int
|
|
ParallelDestroys int
|
|
}
|
|
|
|
// AllocCounter is used by AllocGarbageCollector to discover how many un-GC'd
|
|
// allocations a client has and is generally fulfilled by the Client.
|
|
type AllocCounter interface {
|
|
NumAllocs() int
|
|
}
|
|
|
|
// AllocGarbageCollector garbage collects terminated allocations on a node
|
|
type AllocGarbageCollector struct {
|
|
config *GCConfig
|
|
|
|
// allocRunners marked for GC
|
|
allocRunners *IndexedGCAllocPQ
|
|
|
|
// statsCollector for node based thresholds (eg disk)
|
|
statsCollector stats.NodeStatsCollector
|
|
|
|
// allocCounter return the number of un-GC'd allocs on this node
|
|
allocCounter AllocCounter
|
|
|
|
// destroyCh is a semaphore for rate limiting concurrent garbage
|
|
// collections
|
|
destroyCh chan struct{}
|
|
|
|
// shutdownCh is closed when the GC's run method should exit
|
|
shutdownCh chan struct{}
|
|
|
|
// triggerCh is ticked by the Trigger method to cause a GC
|
|
triggerCh chan struct{}
|
|
|
|
logger hclog.Logger
|
|
}
|
|
|
|
// NewAllocGarbageCollector returns a garbage collector for terminated
|
|
// allocations on a node. Must call Run() in a goroutine enable periodic
|
|
// garbage collection.
|
|
func NewAllocGarbageCollector(logger hclog.Logger, statsCollector stats.NodeStatsCollector, ac AllocCounter, config *GCConfig) *AllocGarbageCollector {
|
|
logger = logger.Named("gc")
|
|
// Require at least 1 to make progress
|
|
if config.ParallelDestroys <= 0 {
|
|
logger.Warn("garbage collector defaulting parallelism to 1 due to invalid input value", "gc_parallel_destroys", config.ParallelDestroys)
|
|
config.ParallelDestroys = 1
|
|
}
|
|
|
|
gc := &AllocGarbageCollector{
|
|
allocRunners: NewIndexedGCAllocPQ(),
|
|
statsCollector: statsCollector,
|
|
allocCounter: ac,
|
|
config: config,
|
|
logger: logger,
|
|
destroyCh: make(chan struct{}, config.ParallelDestroys),
|
|
shutdownCh: make(chan struct{}),
|
|
triggerCh: make(chan struct{}, 1),
|
|
}
|
|
|
|
return gc
|
|
}
|
|
|
|
// Run the periodic garbage collector.
|
|
func (a *AllocGarbageCollector) Run() {
|
|
ticker := time.NewTicker(a.config.Interval)
|
|
for {
|
|
select {
|
|
case <-a.triggerCh:
|
|
case <-ticker.C:
|
|
case <-a.shutdownCh:
|
|
ticker.Stop()
|
|
return
|
|
}
|
|
|
|
if err := a.keepUsageBelowThreshold(); err != nil {
|
|
a.logger.Error("error garbage collecting allocations", "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Trigger forces the garbage collector to run.
|
|
func (a *AllocGarbageCollector) Trigger() {
|
|
select {
|
|
case a.triggerCh <- struct{}{}:
|
|
default:
|
|
// already triggered
|
|
}
|
|
}
|
|
|
|
// keepUsageBelowThreshold collects disk usage information and garbage collects
|
|
// allocations to make disk space available.
|
|
func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
|
|
for {
|
|
select {
|
|
case <-a.shutdownCh:
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
// Check if we have enough free space
|
|
if err := a.statsCollector.Collect(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// See if we are below thresholds for used disk space and inode usage
|
|
diskStats := a.statsCollector.Stats().AllocDirStats
|
|
reason := ""
|
|
logf := a.logger.Warn
|
|
|
|
liveAllocs := a.allocCounter.NumAllocs()
|
|
|
|
switch {
|
|
case diskStats.UsedPercent > a.config.DiskUsageThreshold:
|
|
reason = fmt.Sprintf("disk usage of %.0f is over gc threshold of %.0f",
|
|
diskStats.UsedPercent, a.config.DiskUsageThreshold)
|
|
case diskStats.InodesUsedPercent > a.config.InodeUsageThreshold:
|
|
reason = fmt.Sprintf("inode usage of %.0f is over gc threshold of %.0f",
|
|
diskStats.InodesUsedPercent, a.config.InodeUsageThreshold)
|
|
case liveAllocs > a.config.MaxAllocs:
|
|
// if we're unable to gc, don't WARN until at least 2x over limit
|
|
if liveAllocs < (a.config.MaxAllocs * 2) {
|
|
logf = a.logger.Info
|
|
}
|
|
reason = fmt.Sprintf("number of allocations (%d) is over the limit (%d)", liveAllocs, a.config.MaxAllocs)
|
|
}
|
|
|
|
if reason == "" {
|
|
// No reason to gc, exit
|
|
break
|
|
}
|
|
|
|
// Collect an allocation
|
|
gcAlloc := a.allocRunners.Pop()
|
|
if gcAlloc == nil {
|
|
logf("garbage collection skipped because no terminal allocations", "reason", reason)
|
|
break
|
|
}
|
|
|
|
// Destroy the alloc runner and wait until it exits
|
|
a.destroyAllocRunner(gcAlloc.allocID, gcAlloc.allocRunner, reason)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// destroyAllocRunner is used to destroy an allocation runner. It will acquire a
|
|
// lock to restrict parallelism and then destroy the alloc runner, returning
|
|
// once the allocation has been destroyed.
|
|
func (a *AllocGarbageCollector) destroyAllocRunner(allocID string, ar interfaces.AllocRunner, reason string) {
|
|
a.logger.Info("garbage collecting allocation", "alloc_id", allocID, "reason", reason)
|
|
|
|
// Acquire the destroy lock
|
|
select {
|
|
case <-a.shutdownCh:
|
|
return
|
|
case a.destroyCh <- struct{}{}:
|
|
}
|
|
|
|
ar.Destroy()
|
|
|
|
select {
|
|
case <-ar.DestroyCh():
|
|
case <-a.shutdownCh:
|
|
}
|
|
|
|
a.logger.Debug("alloc garbage collected", "alloc_id", allocID)
|
|
|
|
// Release the lock
|
|
<-a.destroyCh
|
|
}
|
|
|
|
func (a *AllocGarbageCollector) Stop() {
|
|
close(a.shutdownCh)
|
|
}
|
|
|
|
// Collect garbage collects a single allocation on a node. Returns true if
|
|
// alloc was found and garbage collected; otherwise false.
|
|
func (a *AllocGarbageCollector) Collect(allocID string) bool {
|
|
gcAlloc := a.allocRunners.Remove(allocID)
|
|
if gcAlloc == nil {
|
|
a.logger.Debug("alloc was already garbage collected", "alloc_id", allocID)
|
|
return false
|
|
}
|
|
|
|
a.destroyAllocRunner(allocID, gcAlloc.allocRunner, "forced collection")
|
|
return true
|
|
}
|
|
|
|
// CollectAll garbage collects all terminated allocations on a node
|
|
func (a *AllocGarbageCollector) CollectAll() {
|
|
for {
|
|
select {
|
|
case <-a.shutdownCh:
|
|
return
|
|
default:
|
|
}
|
|
|
|
gcAlloc := a.allocRunners.Pop()
|
|
if gcAlloc == nil {
|
|
return
|
|
}
|
|
|
|
go a.destroyAllocRunner(gcAlloc.allocID, gcAlloc.allocRunner, "forced full node collection")
|
|
}
|
|
}
|
|
|
|
// MakeRoomFor garbage collects enough number of allocations in the terminal
|
|
// state to make room for new allocations
|
|
func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) error {
|
|
if len(allocations) == 0 {
|
|
// Nothing to make room for!
|
|
return nil
|
|
}
|
|
|
|
// GC allocs until below the max limit + the new allocations
|
|
max := a.config.MaxAllocs - len(allocations)
|
|
for a.allocCounter.NumAllocs() > max {
|
|
select {
|
|
case <-a.shutdownCh:
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
gcAlloc := a.allocRunners.Pop()
|
|
if gcAlloc == nil {
|
|
// It's fine if we can't lower below the limit here as
|
|
// we'll keep trying to drop below the limit with each
|
|
// periodic gc
|
|
break
|
|
}
|
|
|
|
// Destroy the alloc runner and wait until it exits
|
|
a.destroyAllocRunner(gcAlloc.allocID, gcAlloc.allocRunner, fmt.Sprintf("new allocations and over max (%d)", a.config.MaxAllocs))
|
|
}
|
|
|
|
totalResource := &structs.AllocatedSharedResources{}
|
|
for _, alloc := range allocations {
|
|
// COMPAT(0.11): Remove in 0.11
|
|
if alloc.AllocatedResources != nil {
|
|
totalResource.Add(&alloc.AllocatedResources.Shared)
|
|
} else {
|
|
totalResource.DiskMB += int64(alloc.Resources.DiskMB)
|
|
}
|
|
}
|
|
|
|
// If the host has enough free space to accommodate the new allocations then
|
|
// we don't need to garbage collect terminated allocations
|
|
if hostStats := a.statsCollector.Stats(); hostStats != nil {
|
|
var availableForAllocations uint64
|
|
if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
|
|
availableForAllocations = 0
|
|
} else {
|
|
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
|
|
}
|
|
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
var diskCleared int64
|
|
for {
|
|
select {
|
|
case <-a.shutdownCh:
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
// Collect host stats and see if we still need to remove older
|
|
// allocations
|
|
var allocDirStats *stats.DiskStats
|
|
if err := a.statsCollector.Collect(); err == nil {
|
|
if hostStats := a.statsCollector.Stats(); hostStats != nil {
|
|
allocDirStats = hostStats.AllocDirStats
|
|
}
|
|
}
|
|
|
|
if allocDirStats != nil {
|
|
if allocDirStats.Available >= uint64(totalResource.DiskMB*MB) {
|
|
break
|
|
}
|
|
} else {
|
|
// Falling back to a simpler model to know if we have enough disk
|
|
// space if stats collection fails
|
|
if diskCleared >= totalResource.DiskMB {
|
|
break
|
|
}
|
|
}
|
|
|
|
gcAlloc := a.allocRunners.Pop()
|
|
if gcAlloc == nil {
|
|
break
|
|
}
|
|
|
|
ar := gcAlloc.allocRunner
|
|
alloc := ar.Alloc()
|
|
|
|
// COMPAT(0.11): Remove in 0.11
|
|
var allocDiskMB int64
|
|
if alloc.AllocatedResources != nil {
|
|
allocDiskMB = alloc.AllocatedResources.Shared.DiskMB
|
|
} else {
|
|
allocDiskMB = int64(alloc.Resources.DiskMB)
|
|
}
|
|
|
|
// Destroy the alloc runner and wait until it exits
|
|
a.destroyAllocRunner(gcAlloc.allocID, ar, fmt.Sprintf("freeing %d MB for new allocations", allocDiskMB))
|
|
|
|
diskCleared += allocDiskMB
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarkForCollection starts tracking an allocation for Garbage Collection
|
|
func (a *AllocGarbageCollector) MarkForCollection(allocID string, ar interfaces.AllocRunner) {
|
|
if a.allocRunners.Push(allocID, ar) {
|
|
a.logger.Info("marking allocation for GC", "alloc_id", allocID)
|
|
}
|
|
}
|
|
|
|
// GCAlloc wraps an allocation runner and an index enabling it to be used within
|
|
// a PQ
|
|
type GCAlloc struct {
|
|
timeStamp time.Time
|
|
allocID string
|
|
allocRunner interfaces.AllocRunner
|
|
index int
|
|
}
|
|
|
|
type GCAllocPQImpl []*GCAlloc
|
|
|
|
func (pq GCAllocPQImpl) Len() int {
|
|
return len(pq)
|
|
}
|
|
|
|
func (pq GCAllocPQImpl) Less(i, j int) bool {
|
|
return pq[i].timeStamp.Before(pq[j].timeStamp)
|
|
}
|
|
|
|
func (pq GCAllocPQImpl) Swap(i, j int) {
|
|
pq[i], pq[j] = pq[j], pq[i]
|
|
pq[i].index = i
|
|
pq[j].index = j
|
|
}
|
|
|
|
func (pq *GCAllocPQImpl) Push(x interface{}) {
|
|
n := len(*pq)
|
|
item := x.(*GCAlloc)
|
|
item.index = n
|
|
*pq = append(*pq, item)
|
|
}
|
|
|
|
func (pq *GCAllocPQImpl) Pop() interface{} {
|
|
old := *pq
|
|
n := len(old)
|
|
item := old[n-1]
|
|
item.index = -1 // for safety
|
|
*pq = old[0 : n-1]
|
|
return item
|
|
}
|
|
|
|
// IndexedGCAllocPQ is an indexed PQ which maintains a list of allocation runner
|
|
// based on their termination time.
|
|
type IndexedGCAllocPQ struct {
|
|
index map[string]*GCAlloc
|
|
heap GCAllocPQImpl
|
|
|
|
pqLock sync.Mutex
|
|
}
|
|
|
|
func NewIndexedGCAllocPQ() *IndexedGCAllocPQ {
|
|
return &IndexedGCAllocPQ{
|
|
index: make(map[string]*GCAlloc),
|
|
heap: make(GCAllocPQImpl, 0),
|
|
}
|
|
}
|
|
|
|
// Push an alloc runner into the GC queue. Returns true if alloc was added,
|
|
// false if the alloc already existed.
|
|
func (i *IndexedGCAllocPQ) Push(allocID string, ar interfaces.AllocRunner) bool {
|
|
i.pqLock.Lock()
|
|
defer i.pqLock.Unlock()
|
|
|
|
if _, ok := i.index[allocID]; ok {
|
|
// No work to do
|
|
return false
|
|
}
|
|
gcAlloc := &GCAlloc{
|
|
timeStamp: time.Now(),
|
|
allocID: allocID,
|
|
allocRunner: ar,
|
|
}
|
|
i.index[allocID] = gcAlloc
|
|
heap.Push(&i.heap, gcAlloc)
|
|
return true
|
|
}
|
|
|
|
func (i *IndexedGCAllocPQ) Pop() *GCAlloc {
|
|
i.pqLock.Lock()
|
|
defer i.pqLock.Unlock()
|
|
|
|
if len(i.heap) == 0 {
|
|
return nil
|
|
}
|
|
|
|
gcAlloc := heap.Pop(&i.heap).(*GCAlloc)
|
|
delete(i.index, gcAlloc.allocRunner.Alloc().ID)
|
|
return gcAlloc
|
|
}
|
|
|
|
// Remove alloc from GC. Returns nil if alloc doesn't exist.
|
|
func (i *IndexedGCAllocPQ) Remove(allocID string) *GCAlloc {
|
|
i.pqLock.Lock()
|
|
defer i.pqLock.Unlock()
|
|
|
|
if gcAlloc, ok := i.index[allocID]; ok {
|
|
heap.Remove(&i.heap, gcAlloc.index)
|
|
delete(i.index, allocID)
|
|
return gcAlloc
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (i *IndexedGCAllocPQ) Length() int {
|
|
i.pqLock.Lock()
|
|
defer i.pqLock.Unlock()
|
|
|
|
return len(i.heap)
|
|
}
|