diff --git a/command/agent/agent.go b/command/agent/agent.go index 47c29326b..27c0f07f3 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -184,6 +184,13 @@ func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Confi } conf.EvalGCThreshold = dur } + if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { + dur, err := time.ParseDuration(gcThreshold) + if err != nil { + return nil, err + } + conf.DeploymentGCThreshold = dur + } if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != "" { dur, err := time.ParseDuration(heartbeatGrace) diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 8347dc4c6..faf2474e2 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -71,6 +71,7 @@ server { node_gc_threshold = "12h" job_gc_threshold = "12h" eval_gc_threshold = "12h" + deployment_gc_threshold = "12h" heartbeat_grace = "30s" retry_join = [ "1.1.1.1", "2.2.2.2" ] start_join = [ "1.1.1.1", "2.2.2.2" ] diff --git a/command/agent/config.go b/command/agent/config.go index 02e1c9f51..b533a245b 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -270,6 +270,11 @@ type ServerConfig struct { // can be used to filter by age. EvalGCThreshold string `mapstructure:"eval_gc_threshold"` + // DeploymentGCThreshold controls how "old" a deployment must be to be + // collected by GC. Age is not the only requirement for a deployment to be + // GCed but the threshold can be used to filter by age. + DeploymentGCThreshold string `mapstructure:"deployment_gc_threshold"` + // HeartbeatGrace is the grace period beyond the TTL to account for network, // processing delays and clock skew before marking a node as "down". HeartbeatGrace string `mapstructure:"heartbeat_grace"` @@ -916,6 +921,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.EvalGCThreshold != "" { result.EvalGCThreshold = b.EvalGCThreshold } + if b.DeploymentGCThreshold != "" { + result.DeploymentGCThreshold = b.DeploymentGCThreshold + } if b.HeartbeatGrace != "" { result.HeartbeatGrace = b.HeartbeatGrace } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 1d0f387bf..f8011527b 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -504,6 +504,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { "node_gc_threshold", "eval_gc_threshold", "job_gc_threshold", + "deployment_gc_threshold", "heartbeat_grace", "start_join", "retry_join", diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 543cb046b..7d9b969cc 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -82,22 +82,23 @@ func TestConfig_Parse(t *testing.T) { NoHostUUID: helper.BoolToPtr(false), }, Server: &ServerConfig{ - Enabled: true, - BootstrapExpect: 5, - DataDir: "/tmp/data", - ProtocolVersion: 3, - NumSchedulers: 2, - EnabledSchedulers: []string{"test"}, - NodeGCThreshold: "12h", - EvalGCThreshold: "12h", - JobGCThreshold: "12h", - HeartbeatGrace: "30s", - RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, - StartJoin: []string{"1.1.1.1", "2.2.2.2"}, - RetryInterval: "15s", - RejoinAfterLeave: true, - RetryMaxAttempts: 3, - EncryptKey: "abc", + Enabled: true, + BootstrapExpect: 5, + DataDir: "/tmp/data", + ProtocolVersion: 3, + NumSchedulers: 2, + EnabledSchedulers: []string{"test"}, + NodeGCThreshold: "12h", + EvalGCThreshold: "12h", + JobGCThreshold: "12h", + DeploymentGCThreshold: "12h", + HeartbeatGrace: "30s", + RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, + StartJoin: []string{"1.1.1.1", "2.2.2.2"}, + RetryInterval: "15s", + RejoinAfterLeave: true, + RetryMaxAttempts: 3, + EncryptKey: "abc", }, Telemetry: &Telemetry{ StatsiteAddr: "127.0.0.1:1234", diff --git a/nomad/config.go b/nomad/config.go index 92e16b1e7..29161ee57 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -142,10 +142,18 @@ type Config struct { // NodeGCInterval is how often we dispatch a job to GC failed nodes. NodeGCInterval time.Duration - // NodeGCThreshold is how "old" a nodemust be to be eligible + // NodeGCThreshold is how "old" a node must be to be eligible // for GC. This gives users some time to view and debug a failed nodes. NodeGCThreshold time.Duration + // DeploymentGCInterval is how often we dispatch a job to GC terminal + // deployments. + DeploymentGCInterval time.Duration + + // DeploymentGCThreshold is how "old" a deployment must be to be eligible + // for GC. This gives users some time to view terminal deployments. + DeploymentGCThreshold time.Duration + // EvalNackTimeout controls how long we allow a sub-scheduler to // work on an evaluation before we consider it failed and Nack it. // This allows that evaluation to be handed to another sub-scheduler @@ -255,6 +263,8 @@ func DefaultConfig() *Config { JobGCThreshold: 4 * time.Hour, NodeGCInterval: 5 * time.Minute, NodeGCThreshold: 24 * time.Hour, + DeploymentGCInterval: 5 * time.Minute, + DeploymentGCThreshold: 1 * time.Hour, EvalNackTimeout: 60 * time.Second, EvalDeliveryLimit: 3, EvalNackInitialReenqueueDelay: 1 * time.Second, diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 15c6d9f6e..90f9863ad 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -76,6 +76,10 @@ server { evaluation must be in the terminal state before it is eligible for garbage collection. This is specified using a label suffix like "30s" or "1h". +- `deployment_gc_threshold` `(string: "1h")` - Specifies the minimum time a + deployment must be in the terminal state before it is eligible for garbage + collection. This is specified using a label suffix like "30s" or "1h". + - `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel scheduler threads to run. This can be as many as one per core, or `0` to disallow this server from making any scheduling decisions. This defaults to