diff --git a/command/agent/agent.go b/command/agent/agent.go index 54a695600..2a5780234 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -275,6 +275,13 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { } conf.NodeGCThreshold = dur } + if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { + dur, err := time.ParseDuration(gcInterval) + if err != nil { + return nil, err + } + conf.JobGCInterval = dur + } if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { dur, err := time.ParseDuration(gcThreshold) if err != nil { diff --git a/command/agent/config.go b/command/agent/config.go index 8153fe8aa..f06e368d9 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -315,6 +315,10 @@ type ServerConfig struct { // can be used to filter by age. NodeGCThreshold string `hcl:"node_gc_threshold"` + // JobGCInterval controls how often we dispatch a job to GC jobs that are + // available for garbage collection. + JobGCInterval string `hcl:"job_gc_interval"` + // JobGCThreshold controls how "old" a job must be to be collected by GC. // Age is not the only requirement for a Job to be GCed but the threshold // can be used to filter by age. @@ -1133,6 +1137,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.NodeGCThreshold != "" { result.NodeGCThreshold = b.NodeGCThreshold } + if b.JobGCInterval != "" { + result.JobGCInterval = b.JobGCInterval + } if b.JobGCThreshold != "" { result.JobGCThreshold = b.JobGCThreshold } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index c54b7873d..c8593ce59 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -93,6 +93,7 @@ var basicConfig = &Config{ EnabledSchedulers: []string{"test"}, NodeGCThreshold: "12h", EvalGCThreshold: "12h", + JobGCInterval: "3m", JobGCThreshold: "12h", DeploymentGCThreshold: "12h", HeartbeatGrace: 30 * time.Second, diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index 037820591..97b4cd99c 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -101,6 +101,7 @@ server { num_schedulers = 2 enabled_schedulers = ["test"] node_gc_threshold = "12h" + job_gc_interval = "3m" job_gc_threshold = "12h" eval_gc_threshold = "12h" deployment_gc_threshold = "12h" diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index cf0c4645a..11af6beb4 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -208,6 +208,7 @@ "encrypt": "abc", "eval_gc_threshold": "12h", "heartbeat_grace": "30s", + "job_gc_interval": "3m", "job_gc_threshold": "12h", "max_heartbeats_per_second": 11, "min_heartbeat_ttl": "33s", diff --git a/website/source/docs/configuration/server.html.md b/website/source/docs/configuration/server.html.md index 35de23397..dbd37594a 100644 --- a/website/source/docs/configuration/server.html.md +++ b/website/source/docs/configuration/server.html.md @@ -77,6 +77,14 @@ server { terminal state before it is garbage collected and purged from the system. This is specified using a label suffix like "30s" or "1h". +- `job_gc_interval` `(string: "5m")` - Specifies the interval between the job + garbage collections. Only jobs who have been terminal for at least + `job_gc_threshold` will be collected. Lowering the interval will perform more + frequent but smaller collections. Raising the interval will perform collections + less frequently but collect more jobs at a time. Reducing this interval is + useful if there is a large throughput of tasks, leading to a large set of + dead jobs. This is specified using a label suffix like "30s" or "3m". + - `job_gc_threshold` `(string: "4h")` - Specifies the minimum time a job must be in the terminal state before it is eligible for garbage collection. This is specified using a label suffix like "30s" or "1h".