Merge pull request #2261 from hashicorp/f-gc-threshold
Making GC related fields tunable
This commit is contained in:
commit
d463bb8e5a
|
@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg
|
||||||
return nil, fmt.Errorf("failed to initialize client: %v", err)
|
return nil, fmt.Errorf("failed to initialize client: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the stats collector and the garbage collector
|
// Add the stats collector
|
||||||
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
|
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
|
||||||
c.hostStatsCollector = statsCollector
|
c.hostStatsCollector = statsCollector
|
||||||
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB)
|
|
||||||
|
// Add the garbage collector
|
||||||
|
gcConfig := &GCConfig{
|
||||||
|
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
|
||||||
|
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
|
||||||
|
Interval: cfg.GCInterval,
|
||||||
|
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
|
||||||
|
}
|
||||||
|
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig)
|
||||||
|
|
||||||
// Setup the node
|
// Setup the node
|
||||||
if err := c.setupNode(); err != nil {
|
if err := c.setupNode(); err != nil {
|
||||||
|
|
|
@ -151,6 +151,18 @@ type Config struct {
|
||||||
// TLSConfig holds various TLS related configurations
|
// TLSConfig holds various TLS related configurations
|
||||||
TLSConfig *config.TLSConfig
|
TLSConfig *config.TLSConfig
|
||||||
|
|
||||||
|
// GCInterval is the time interval at which the client triggers garbage
|
||||||
|
// collection
|
||||||
|
GCInterval time.Duration
|
||||||
|
|
||||||
|
// GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad
|
||||||
|
// client triggers GC of terminal allocations
|
||||||
|
GCDiskUsageThreshold float64
|
||||||
|
|
||||||
|
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
|
||||||
|
// client triggers GC of the terminal allocations
|
||||||
|
GCInodeUsageThreshold float64
|
||||||
|
|
||||||
// LogLevel is the level of the logs to putout
|
// LogLevel is the level of the logs to putout
|
||||||
LogLevel string
|
LogLevel string
|
||||||
}
|
}
|
||||||
|
@ -177,6 +189,9 @@ func DefaultConfig() *Config {
|
||||||
StatsCollectionInterval: 1 * time.Second,
|
StatsCollectionInterval: 1 * time.Second,
|
||||||
TLSConfig: &config.TLSConfig{},
|
TLSConfig: &config.TLSConfig{},
|
||||||
LogLevel: "DEBUG",
|
LogLevel: "DEBUG",
|
||||||
|
GCInterval: 1 * time.Minute,
|
||||||
|
GCDiskUsageThreshold: 80,
|
||||||
|
GCInodeUsageThreshold: 70,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
36
client/gc.go
36
client/gc.go
|
@ -12,18 +12,6 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// diskUsageThreshold is the percent of used disk space beyond which Nomad
|
|
||||||
// GCs terminated allocations
|
|
||||||
diskUsageThreshold = 80
|
|
||||||
|
|
||||||
// gcInterval is the interval at which Nomad runs the garbage collector
|
|
||||||
gcInterval = 1 * time.Minute
|
|
||||||
|
|
||||||
// inodeUsageThreshold is the percent of inode usage that Nomad tries to
|
|
||||||
// maintain, whenever we are over it we will attempt to GC terminal
|
|
||||||
// allocations
|
|
||||||
inodeUsageThreshold = 70
|
|
||||||
|
|
||||||
// MB is a constant which converts values in bytes to MB
|
// MB is a constant which converts values in bytes to MB
|
||||||
MB = 1024 * 1024
|
MB = 1024 * 1024
|
||||||
)
|
)
|
||||||
|
@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int {
|
||||||
return len(i.heap)
|
return len(i.heap)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GCConfig allows changing the behaviour of the garbage collector
|
||||||
|
type GCConfig struct {
|
||||||
|
DiskUsageThreshold float64
|
||||||
|
InodeUsageThreshold float64
|
||||||
|
Interval time.Duration
|
||||||
|
ReservedDiskMB int
|
||||||
|
}
|
||||||
|
|
||||||
// AllocGarbageCollector garbage collects terminated allocations on a node
|
// AllocGarbageCollector garbage collects terminated allocations on a node
|
||||||
type AllocGarbageCollector struct {
|
type AllocGarbageCollector struct {
|
||||||
allocRunners *IndexedGCAllocPQ
|
allocRunners *IndexedGCAllocPQ
|
||||||
statsCollector stats.NodeStatsCollector
|
statsCollector stats.NodeStatsCollector
|
||||||
reservedDiskMB int
|
config *GCConfig
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
shutdownCh chan struct{}
|
shutdownCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAllocGarbageCollector returns a garbage collector for terminated
|
// NewAllocGarbageCollector returns a garbage collector for terminated
|
||||||
// allocations on a node.
|
// allocations on a node.
|
||||||
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector {
|
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector {
|
||||||
gc := &AllocGarbageCollector{
|
gc := &AllocGarbageCollector{
|
||||||
allocRunners: NewIndexedGCAllocPQ(),
|
allocRunners: NewIndexedGCAllocPQ(),
|
||||||
statsCollector: statsCollector,
|
statsCollector: statsCollector,
|
||||||
reservedDiskMB: reservedDiskMB,
|
config: config,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
shutdownCh: make(chan struct{}),
|
shutdownCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *AllocGarbageCollector) run() {
|
func (a *AllocGarbageCollector) run() {
|
||||||
ticker := time.NewTicker(gcInterval)
|
ticker := time.NewTicker(a.config.Interval)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
|
@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if diskStats.UsedPercent <= diskUsageThreshold &&
|
if diskStats.UsedPercent <= a.config.DiskUsageThreshold &&
|
||||||
diskStats.InodesUsedPercent <= inodeUsageThreshold {
|
diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e
|
||||||
// we don't need to garbage collect terminated allocations
|
// we don't need to garbage collect terminated allocations
|
||||||
if hostStats := a.statsCollector.Stats(); hostStats != nil {
|
if hostStats := a.statsCollector.Stats(); hostStats != nil {
|
||||||
var availableForAllocations uint64
|
var availableForAllocations uint64
|
||||||
if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) {
|
if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
|
||||||
availableForAllocations = 0
|
availableForAllocations = 0
|
||||||
} else {
|
} else {
|
||||||
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB)
|
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
|
||||||
}
|
}
|
||||||
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
|
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
|
||||||
conf.TLSConfig = a.config.TLSConfig
|
conf.TLSConfig = a.config.TLSConfig
|
||||||
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
|
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
|
||||||
|
|
||||||
|
// Set the GC related configs
|
||||||
|
conf.GCInterval = a.config.Client.GCInterval
|
||||||
|
conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
|
||||||
|
conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
|
||||||
|
|
||||||
return conf, nil
|
return conf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,9 @@ client {
|
||||||
data_points = 35
|
data_points = 35
|
||||||
collection_interval = "5s"
|
collection_interval = "5s"
|
||||||
}
|
}
|
||||||
|
gc_interval = "6s"
|
||||||
|
gc_disk_usage_threshold = 82
|
||||||
|
gc_inode_usage_threshold = 91
|
||||||
}
|
}
|
||||||
server {
|
server {
|
||||||
enabled = true
|
enabled = true
|
||||||
|
|
|
@ -197,6 +197,18 @@ type ClientConfig struct {
|
||||||
// be used to target a certain utilization or to prevent Nomad from using a
|
// be used to target a certain utilization or to prevent Nomad from using a
|
||||||
// particular set of ports.
|
// particular set of ports.
|
||||||
Reserved *Resources `mapstructure:"reserved"`
|
Reserved *Resources `mapstructure:"reserved"`
|
||||||
|
|
||||||
|
// GCInterval is the time interval at which the client triggers garbage
|
||||||
|
// collection
|
||||||
|
GCInterval time.Duration `mapstructure:"gc_interval"`
|
||||||
|
|
||||||
|
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
|
||||||
|
// client triggers GC of the terminal allocations
|
||||||
|
GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"`
|
||||||
|
|
||||||
|
// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
|
||||||
|
// client triggers GC of the terminal allocations
|
||||||
|
GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ServerConfig is configuration specific to the server mode
|
// ServerConfig is configuration specific to the server mode
|
||||||
|
@ -465,6 +477,9 @@ func DevConfig() *Config {
|
||||||
conf.Client.Options = map[string]string{
|
conf.Client.Options = map[string]string{
|
||||||
"driver.docker.volumes": "true",
|
"driver.docker.volumes": "true",
|
||||||
}
|
}
|
||||||
|
conf.Client.GCInterval = 10 * time.Minute
|
||||||
|
conf.Client.GCDiskUsageThreshold = 99
|
||||||
|
conf.Client.GCInodeUsageThreshold = 99
|
||||||
|
|
||||||
return conf
|
return conf
|
||||||
}
|
}
|
||||||
|
@ -487,11 +502,14 @@ func DefaultConfig() *Config {
|
||||||
Consul: config.DefaultConsulConfig(),
|
Consul: config.DefaultConsulConfig(),
|
||||||
Vault: config.DefaultVaultConfig(),
|
Vault: config.DefaultVaultConfig(),
|
||||||
Client: &ClientConfig{
|
Client: &ClientConfig{
|
||||||
Enabled: false,
|
Enabled: false,
|
||||||
MaxKillTimeout: "30s",
|
MaxKillTimeout: "30s",
|
||||||
ClientMinPort: 14000,
|
ClientMinPort: 14000,
|
||||||
ClientMaxPort: 14512,
|
ClientMaxPort: 14512,
|
||||||
Reserved: &Resources{},
|
Reserved: &Resources{},
|
||||||
|
GCInterval: 1 * time.Minute,
|
||||||
|
GCInodeUsageThreshold: 70,
|
||||||
|
GCDiskUsageThreshold: 80,
|
||||||
},
|
},
|
||||||
Server: &ServerConfig{
|
Server: &ServerConfig{
|
||||||
Enabled: false,
|
Enabled: false,
|
||||||
|
|
|
@ -341,6 +341,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
|
||||||
"client_min_port",
|
"client_min_port",
|
||||||
"reserved",
|
"reserved",
|
||||||
"stats",
|
"stats",
|
||||||
|
"gc_interval",
|
||||||
|
"gc_disk_usage_threshold",
|
||||||
|
"gc_inode_usage_threshold",
|
||||||
}
|
}
|
||||||
if err := checkHCLKeys(listVal, valid); err != nil {
|
if err := checkHCLKeys(listVal, valid); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -358,7 +361,15 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
|
||||||
delete(m, "stats")
|
delete(m, "stats")
|
||||||
|
|
||||||
var config ClientConfig
|
var config ClientConfig
|
||||||
if err := mapstructure.WeakDecode(m, &config); err != nil {
|
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||||
|
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||||
|
WeaklyTypedInput: true,
|
||||||
|
Result: &config,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := dec.Decode(m); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -70,6 +70,9 @@ func TestConfig_Parse(t *testing.T) {
|
||||||
ReservedPorts: "1,100,10-12",
|
ReservedPorts: "1,100,10-12",
|
||||||
ParsedReservedPorts: []int{1, 10, 11, 12, 100},
|
ParsedReservedPorts: []int{1, 10, 11, 12, 100},
|
||||||
},
|
},
|
||||||
|
GCInterval: 6 * time.Second,
|
||||||
|
GCDiskUsageThreshold: 82,
|
||||||
|
GCInodeUsageThreshold: 91,
|
||||||
},
|
},
|
||||||
Server: &ServerConfig{
|
Server: &ServerConfig{
|
||||||
Enabled: true,
|
Enabled: true,
|
||||||
|
|
|
@ -84,3 +84,4 @@ done
|
||||||
echo
|
echo
|
||||||
echo "==> Results:"
|
echo "==> Results:"
|
||||||
tree pkg/
|
tree pkg/
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,15 @@ client {
|
||||||
[data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with
|
[data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with
|
||||||
"client", like `"/opt/nomad/client"`. This must be an absolute path.
|
"client", like `"/opt/nomad/client"`. This must be an absolute path.
|
||||||
|
|
||||||
|
- `gc_interval` `(string: "1m")` - Specifies the interval at which Nomad
|
||||||
|
attempts to garbage collect terminal allocation directories.
|
||||||
|
|
||||||
|
- `gc_disk_usage_threshold` `(float: 80)` - Specifies the disk usage percent which
|
||||||
|
Nomad tries to maintain by garbage collecting terminal allocations.
|
||||||
|
|
||||||
|
- `gc_inode_usage_threshold` `(float: 70)` - Specifies the inode usage percent
|
||||||
|
which Nomad tries to maintain by garbage collecting terminal allocations.
|
||||||
|
|
||||||
### `chroot_env` Parameters
|
### `chroot_env` Parameters
|
||||||
|
|
||||||
Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file
|
Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file
|
||||||
|
|
Loading…
Reference in New Issue