Merge pull request #2261 from hashicorp/f-gc-threshold

Making GC related fields tunable
This commit is contained in:
Diptanu Choudhury 2017-01-31 16:44:53 -08:00 committed by GitHub
commit d463bb8e5a
10 changed files with 97 additions and 28 deletions

View file

@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg
return nil, fmt.Errorf("failed to initialize client: %v", err) return nil, fmt.Errorf("failed to initialize client: %v", err)
} }
// Add the stats collector and the garbage collector // Add the stats collector
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir) statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
c.hostStatsCollector = statsCollector c.hostStatsCollector = statsCollector
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB)
// Add the garbage collector
gcConfig := &GCConfig{
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
Interval: cfg.GCInterval,
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
}
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig)
// Setup the node // Setup the node
if err := c.setupNode(); err != nil { if err := c.setupNode(); err != nil {

View file

@ -151,6 +151,18 @@ type Config struct {
// TLSConfig holds various TLS related configurations // TLSConfig holds various TLS related configurations
TLSConfig *config.TLSConfig TLSConfig *config.TLSConfig
// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration
// GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad
// client triggers GC of terminal allocations
GCDiskUsageThreshold float64
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64
// LogLevel is the level of the logs to putout // LogLevel is the level of the logs to putout
LogLevel string LogLevel string
} }
@ -177,6 +189,9 @@ func DefaultConfig() *Config {
StatsCollectionInterval: 1 * time.Second, StatsCollectionInterval: 1 * time.Second,
TLSConfig: &config.TLSConfig{}, TLSConfig: &config.TLSConfig{},
LogLevel: "DEBUG", LogLevel: "DEBUG",
GCInterval: 1 * time.Minute,
GCDiskUsageThreshold: 80,
GCInodeUsageThreshold: 70,
} }
} }

View file

@ -12,18 +12,6 @@ import (
) )
const ( const (
// diskUsageThreshold is the percent of used disk space beyond which Nomad
// GCs terminated allocations
diskUsageThreshold = 80
// gcInterval is the interval at which Nomad runs the garbage collector
gcInterval = 1 * time.Minute
// inodeUsageThreshold is the percent of inode usage that Nomad tries to
// maintain, whenever we are over it we will attempt to GC terminal
// allocations
inodeUsageThreshold = 70
// MB is a constant which converts values in bytes to MB // MB is a constant which converts values in bytes to MB
MB = 1024 * 1024 MB = 1024 * 1024
) )
@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int {
return len(i.heap) return len(i.heap)
} }
// GCConfig allows changing the behaviour of the garbage collector
type GCConfig struct {
DiskUsageThreshold float64
InodeUsageThreshold float64
Interval time.Duration
ReservedDiskMB int
}
// AllocGarbageCollector garbage collects terminated allocations on a node // AllocGarbageCollector garbage collects terminated allocations on a node
type AllocGarbageCollector struct { type AllocGarbageCollector struct {
allocRunners *IndexedGCAllocPQ allocRunners *IndexedGCAllocPQ
statsCollector stats.NodeStatsCollector statsCollector stats.NodeStatsCollector
reservedDiskMB int config *GCConfig
logger *log.Logger logger *log.Logger
shutdownCh chan struct{} shutdownCh chan struct{}
} }
// NewAllocGarbageCollector returns a garbage collector for terminated // NewAllocGarbageCollector returns a garbage collector for terminated
// allocations on a node. // allocations on a node.
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector { func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector {
gc := &AllocGarbageCollector{ gc := &AllocGarbageCollector{
allocRunners: NewIndexedGCAllocPQ(), allocRunners: NewIndexedGCAllocPQ(),
statsCollector: statsCollector, statsCollector: statsCollector,
reservedDiskMB: reservedDiskMB, config: config,
logger: logger, logger: logger,
shutdownCh: make(chan struct{}), shutdownCh: make(chan struct{}),
} }
@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats
} }
func (a *AllocGarbageCollector) run() { func (a *AllocGarbageCollector) run() {
ticker := time.NewTicker(gcInterval) ticker := time.NewTicker(a.config.Interval)
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
break break
} }
if diskStats.UsedPercent <= diskUsageThreshold && if diskStats.UsedPercent <= a.config.DiskUsageThreshold &&
diskStats.InodesUsedPercent <= inodeUsageThreshold { diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold {
break break
} }
@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e
// we don't need to garbage collect terminated allocations // we don't need to garbage collect terminated allocations
if hostStats := a.statsCollector.Stats(); hostStats != nil { if hostStats := a.statsCollector.Stats(); hostStats != nil {
var availableForAllocations uint64 var availableForAllocations uint64
if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) { if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
availableForAllocations = 0 availableForAllocations = 0
} else { } else {
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB) availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
} }
if uint64(totalResource.DiskMB*MB) < availableForAllocations { if uint64(totalResource.DiskMB*MB) < availableForAllocations {
return nil return nil

View file

@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
conf.TLSConfig = a.config.TLSConfig conf.TLSConfig = a.config.TLSConfig
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
// Set the GC related configs
conf.GCInterval = a.config.Client.GCInterval
conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
return conf, nil return conf, nil
} }

View file

@ -53,6 +53,9 @@ client {
data_points = 35 data_points = 35
collection_interval = "5s" collection_interval = "5s"
} }
gc_interval = "6s"
gc_disk_usage_threshold = 82
gc_inode_usage_threshold = 91
} }
server { server {
enabled = true enabled = true

View file

@ -197,6 +197,18 @@ type ClientConfig struct {
// be used to target a certain utilization or to prevent Nomad from using a // be used to target a certain utilization or to prevent Nomad from using a
// particular set of ports. // particular set of ports.
Reserved *Resources `mapstructure:"reserved"` Reserved *Resources `mapstructure:"reserved"`
// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration `mapstructure:"gc_interval"`
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"`
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"`
} }
// ServerConfig is configuration specific to the server mode // ServerConfig is configuration specific to the server mode
@ -465,6 +477,9 @@ func DevConfig() *Config {
conf.Client.Options = map[string]string{ conf.Client.Options = map[string]string{
"driver.docker.volumes": "true", "driver.docker.volumes": "true",
} }
conf.Client.GCInterval = 10 * time.Minute
conf.Client.GCDiskUsageThreshold = 99
conf.Client.GCInodeUsageThreshold = 99
return conf return conf
} }
@ -492,6 +507,9 @@ func DefaultConfig() *Config {
ClientMinPort: 14000, ClientMinPort: 14000,
ClientMaxPort: 14512, ClientMaxPort: 14512,
Reserved: &Resources{}, Reserved: &Resources{},
GCInterval: 1 * time.Minute,
GCInodeUsageThreshold: 70,
GCDiskUsageThreshold: 80,
}, },
Server: &ServerConfig{ Server: &ServerConfig{
Enabled: false, Enabled: false,

View file

@ -341,6 +341,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
"client_min_port", "client_min_port",
"reserved", "reserved",
"stats", "stats",
"gc_interval",
"gc_disk_usage_threshold",
"gc_inode_usage_threshold",
} }
if err := checkHCLKeys(listVal, valid); err != nil { if err := checkHCLKeys(listVal, valid); err != nil {
return err return err
@ -358,7 +361,15 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
delete(m, "stats") delete(m, "stats")
var config ClientConfig var config ClientConfig
if err := mapstructure.WeakDecode(m, &config); err != nil { dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
WeaklyTypedInput: true,
Result: &config,
})
if err != nil {
return err
}
if err := dec.Decode(m); err != nil {
return err return err
} }

View file

@ -70,6 +70,9 @@ func TestConfig_Parse(t *testing.T) {
ReservedPorts: "1,100,10-12", ReservedPorts: "1,100,10-12",
ParsedReservedPorts: []int{1, 10, 11, 12, 100}, ParsedReservedPorts: []int{1, 10, 11, 12, 100},
}, },
GCInterval: 6 * time.Second,
GCDiskUsageThreshold: 82,
GCInodeUsageThreshold: 91,
}, },
Server: &ServerConfig{ Server: &ServerConfig{
Enabled: true, Enabled: true,

View file

@ -84,3 +84,4 @@ done
echo echo
echo "==> Results:" echo "==> Results:"
tree pkg/ tree pkg/

View file

@ -83,6 +83,15 @@ client {
[data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with [data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with
"client", like `"/opt/nomad/client"`. This must be an absolute path. "client", like `"/opt/nomad/client"`. This must be an absolute path.
- `gc_interval` `(string: "1m")` - Specifies the interval at which Nomad
attempts to garbage collect terminal allocation directories.
- `gc_disk_usage_threshold` `(float: 80)` - Specifies the disk usage percent which
Nomad tries to maintain by garbage collecting terminal allocations.
- `gc_inode_usage_threshold` `(float: 70)` - Specifies the inode usage percent
which Nomad tries to maintain by garbage collecting terminal allocations.
### `chroot_env` Parameters ### `chroot_env` Parameters
Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file