Making the GC related fields tunable

This commit is contained in:
Diptanu Choudhury 2017-01-31 15:32:20 -08:00
parent b6c629a6b9
commit 11d7cb1230
8 changed files with 87 additions and 28 deletions

View file

@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg
return nil, fmt.Errorf("failed to initialize client: %v", err)
}
// Add the stats collector and the garbage collector
// Add the stats collector
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
c.hostStatsCollector = statsCollector
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB)
// Add the garbage collector
gcConfig := &GCConfig{
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
Interval: cfg.GCInterval,
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
}
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig)
// Setup the node
if err := c.setupNode(); err != nil {

View file

@ -151,6 +151,18 @@ type Config struct {
// TLSConfig holds various TLS related configurations
TLSConfig *config.TLSConfig
// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration
// GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad
// client triggers GC of terminal allocations
GCDiskUsageThreshold float64
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64
// LogLevel is the level of the logs to putout
LogLevel string
}
@ -177,6 +189,9 @@ func DefaultConfig() *Config {
StatsCollectionInterval: 1 * time.Second,
TLSConfig: &config.TLSConfig{},
LogLevel: "DEBUG",
GCInterval: 1 * time.Minute,
GCDiskUsageThreshold: 80,
GCInodeUsageThreshold: 70,
}
}

View file

@ -12,18 +12,6 @@ import (
)
const (
// diskUsageThreshold is the percent of used disk space beyond which Nomad
// GCs terminated allocations
diskUsageThreshold = 80
// gcInterval is the interval at which Nomad runs the garbage collector
gcInterval = 1 * time.Minute
// inodeUsageThreshold is the percent of inode usage that Nomad tries to
// maintain, whenever we are over it we will attempt to GC terminal
// allocations
inodeUsageThreshold = 70
// MB is a constant which converts values in bytes to MB
MB = 1024 * 1024
)
@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int {
return len(i.heap)
}
// GCConfig allows changing the behaviour of the garbage collector
type GCConfig struct {
DiskUsageThreshold float64
InodeUsageThreshold float64
Interval time.Duration
ReservedDiskMB int
}
// AllocGarbageCollector garbage collects terminated allocations on a node
type AllocGarbageCollector struct {
allocRunners *IndexedGCAllocPQ
statsCollector stats.NodeStatsCollector
reservedDiskMB int
config *GCConfig
logger *log.Logger
shutdownCh chan struct{}
}
// NewAllocGarbageCollector returns a garbage collector for terminated
// allocations on a node.
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector {
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector {
gc := &AllocGarbageCollector{
allocRunners: NewIndexedGCAllocPQ(),
statsCollector: statsCollector,
reservedDiskMB: reservedDiskMB,
config: config,
logger: logger,
shutdownCh: make(chan struct{}),
}
@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats
}
func (a *AllocGarbageCollector) run() {
ticker := time.NewTicker(gcInterval)
ticker := time.NewTicker(a.config.Interval)
for {
select {
case <-ticker.C:
@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
break
}
if diskStats.UsedPercent <= diskUsageThreshold &&
diskStats.InodesUsedPercent <= inodeUsageThreshold {
if diskStats.UsedPercent <= a.config.DiskUsageThreshold &&
diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold {
break
}
@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e
// we don't need to garbage collect terminated allocations
if hostStats := a.statsCollector.Stats(); hostStats != nil {
var availableForAllocations uint64
if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) {
if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
availableForAllocations = 0
} else {
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB)
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
}
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
return nil

View file

@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
conf.TLSConfig = a.config.TLSConfig
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
// Set the GC related configs
conf.GCInterval = a.config.Client.GCInterval
conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
return conf, nil
}

View file

@ -53,6 +53,9 @@ client {
data_points = 35
collection_interval = "5s"
}
gc_interval = "6s"
gc_disk_usage_threshold = 82
gc_inode_usage_threshold = 91
}
server {
enabled = true

View file

@ -197,6 +197,18 @@ type ClientConfig struct {
// be used to target a certain utilization or to prevent Nomad from using a
// particular set of ports.
Reserved *Resources `mapstructure:"reserved"`
// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration `mapstructure:"gc_interval"`
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"`
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"`
}
// ServerConfig is configuration specific to the server mode
@ -465,6 +477,9 @@ func DevConfig() *Config {
conf.Client.Options = map[string]string{
"driver.docker.volumes": "true",
}
conf.Client.GCInterval = 10 * time.Minute
conf.Client.GCDiskUsageThreshold = 99
conf.Client.GCInodeUsageThreshold = 99
return conf
}
@ -487,11 +502,14 @@ func DefaultConfig() *Config {
Consul: config.DefaultConsulConfig(),
Vault: config.DefaultVaultConfig(),
Client: &ClientConfig{
Enabled: false,
MaxKillTimeout: "30s",
ClientMinPort: 14000,
ClientMaxPort: 14512,
Reserved: &Resources{},
Enabled: false,
MaxKillTimeout: "30s",
ClientMinPort: 14000,
ClientMaxPort: 14512,
Reserved: &Resources{},
GCInterval: 1 * time.Minute,
GCInodeUsageThreshold: 70,
GCDiskUsageThreshold: 80,
},
Server: &ServerConfig{
Enabled: false,

View file

@ -341,6 +341,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
"client_min_port",
"reserved",
"stats",
"gc_interval",
"gc_disk_usage_threshold",
"gc_inode_usage_threshold",
}
if err := checkHCLKeys(listVal, valid); err != nil {
return err
@ -358,7 +361,15 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
delete(m, "stats")
var config ClientConfig
if err := mapstructure.WeakDecode(m, &config); err != nil {
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
WeaklyTypedInput: true,
Result: &config,
})
if err != nil {
return err
}
if err := dec.Decode(m); err != nil {
return err
}

View file

@ -70,6 +70,9 @@ func TestConfig_Parse(t *testing.T) {
ReservedPorts: "1,100,10-12",
ParsedReservedPorts: []int{1, 10, 11, 12, 100},
},
GCInterval: 6 * time.Second,
GCDiskUsageThreshold: 82,
GCInodeUsageThreshold: 91,
},
Server: &ServerConfig{
Enabled: true,