diff --git a/CHANGELOG.md b/CHANGELOG.md index b04c6b7e8..df57d779f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__ * discovery: Prevent absolute URLs in check paths. The documentation indicated that absolute URLs are not allowed, but it was not enforced. Absolute URLs in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)] + * jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the + attempts/time interval values have been changed to enable faster server side rescheduling. See + [restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information. IMPROVEMENTS: * core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)] diff --git a/api/tasks.go b/api/tasks.go index 95a01eb72..cff892489 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -340,17 +340,17 @@ func (g *TaskGroup) Canonicalize(job *Job) { switch *job.Type { case "service", "system": defaultRestartPolicy = &RestartPolicy{ - Delay: helper.TimeToPtr(15 * time.Second), - Attempts: helper.IntToPtr(2), - Interval: helper.TimeToPtr(1 * time.Minute), - Mode: helper.StringToPtr("delay"), + Delay: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay), + Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval), + Mode: helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode), } default: defaultRestartPolicy = &RestartPolicy{ - Delay: helper.TimeToPtr(15 * time.Second), - Attempts: helper.IntToPtr(15), - Interval: helper.TimeToPtr(7 * 24 * time.Hour), - Mode: helper.StringToPtr("delay"), + Delay: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay), + Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts), + Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval), + Mode: helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode), } } diff --git a/command/init.go b/command/init.go index 519ea8dff..28b341e88 100644 --- a/command/init.go +++ b/command/init.go @@ -183,18 +183,18 @@ job "example" { # restart { # The number of attempts to run the job within the specified interval. - attempts = 10 - interval = "5m" + attempts = 2 + interval = "15s" # The "delay" parameter specifies the duration to wait before restarting # a task after it has failed. - delay = "25s" + delay = "15s" # The "mode" parameter controls what happens when a task has restarted # "attempts" times within the interval. "delay" mode delays the next # restart until the next interval. "fail" mode does not restart the task # if "attempts" has been hit within the interval. - mode = "delay" + mode = "fail" } # The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2a894d7a3..bcc408074 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2510,17 +2510,17 @@ func (d *DispatchPayloadConfig) Validate() error { } var ( - defaultServiceJobRestartPolicy = RestartPolicy{ + DefaultServiceJobRestartPolicy = RestartPolicy{ Delay: 15 * time.Second, Attempts: 2, - Interval: 1 * time.Minute, - Mode: RestartPolicyModeDelay, + Interval: 30 * time.Minute, + Mode: RestartPolicyModeFail, } - defaultBatchJobRestartPolicy = RestartPolicy{ + DefaultBatchJobRestartPolicy = RestartPolicy{ Delay: 15 * time.Second, - Attempts: 15, - Interval: 7 * 24 * time.Hour, - Mode: RestartPolicyModeDelay, + Attempts: 3, + Interval: 24 * time.Hour, + Mode: RestartPolicyModeFail, } ) @@ -2604,10 +2604,10 @@ func (r *RestartPolicy) Validate() error { func NewRestartPolicy(jobType string) *RestartPolicy { switch jobType { case JobTypeService, JobTypeSystem: - rp := defaultServiceJobRestartPolicy + rp := DefaultServiceJobRestartPolicy return &rp case JobTypeBatch: - rp := defaultBatchJobRestartPolicy + rp := DefaultBatchJobRestartPolicy return &rp } return nil diff --git a/website/source/api/json-jobs.html.md b/website/source/api/json-jobs.html.md index 25251c78d..e705bfa07 100644 --- a/website/source/api/json-jobs.html.md +++ b/website/source/api/json-jobs.html.md @@ -91,10 +91,10 @@ Below is the JSON representation of the job outputted by `$ nomad init`: "Leader": false }], "RestartPolicy": { - "Interval": 300000000000, - "Attempts": 10, - "Delay": 25000000000, - "Mode": "delay" + "Interval": 1800000000000, + "Attempts": 2, + "Delay": 15000000000, + "Mode": "fail" }, "EphemeralDisk": { "SizeMB": 300 diff --git a/website/source/docs/job-specification/restart.html.md b/website/source/docs/job-specification/restart.html.md index 13e694a40..967fd033b 100644 --- a/website/source/docs/job-specification/restart.html.md +++ b/website/source/docs/job-specification/restart.html.md @@ -17,7 +17,8 @@ description: |- -The `restart` stanza configures a group's behavior on task failure. +The `restart` stanza configures a group's behavior on task failure. Restarts +happen on the client that is running the task. ```hcl job "docs" { @@ -62,7 +63,7 @@ defaults by job type: attempts = 15 delay = "15s" interval = "168h" - mode = "delay" + mode = "fail" } ``` @@ -73,7 +74,7 @@ defaults by job type: interval = "1m" attempts = 2 delay = "15s" - mode = "delay" + mode = "fail" } ```