Merge pull request #3821 from hashicorp/f-restart-stanza-fail-default

Change the default mode for client side restarts to fail from delay
2018-02-01 16:39:04 -06:00 · 2018-02-01 16:39:04 -06:00 · 98f6872c85
parent 8ecb6ca91b 40cdc39b48
commit 98f6872c85
6 changed files with 32 additions and 28 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__
 * discovery: Prevent absolute URLs in check paths. The documentation indicated
   that absolute URLs are not allowed, but it was not enforced. Absolute URLs
   in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
 * jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the
   attempts/time interval values have been changed to enable faster server side rescheduling. See
   [restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information.
 IMPROVEMENTS:
 * core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)]
--- a/api/tasks.go
+++ b/api/tasks.go
@ -340,17 +340,17 @@ func (g *TaskGroup) Canonicalize(job *Job) {
 	switch *job.Type {
 	case "service", "system":
 		defaultRestartPolicy = &RestartPolicy{
-			Delay:    helper.TimeToPtr(15 * time.Second),
+			Delay:    helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay),
-			Attempts: helper.IntToPtr(2),
+			Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts),
-			Interval: helper.TimeToPtr(1 * time.Minute),
+			Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval),
-			Mode:     helper.StringToPtr("delay"),
+			Mode:     helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode),
 		}
 	default:
 		defaultRestartPolicy = &RestartPolicy{
-			Delay:    helper.TimeToPtr(15 * time.Second),
+			Delay:    helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay),
-			Attempts: helper.IntToPtr(15),
+			Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts),
-			Interval: helper.TimeToPtr(7 * 24 * time.Hour),
+			Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval),
-			Mode:     helper.StringToPtr("delay"),
+			Mode:     helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode),
 		}
 	}
--- a/command/init.go
+++ b/command/init.go
@ -183,18 +183,18 @@ job "example" {
    #
    restart {
      # The number of attempts to run the job within the specified interval.
-      attempts = 10
+      attempts = 2
-      interval = "5m"
+      interval = "15s"
      # The "delay" parameter specifies the duration to wait before restarting
      # a task after it has failed.
-      delay = "25s"
+      delay = "15s"
     # The "mode" parameter controls what happens when a task has restarted
     # "attempts" times within the interval. "delay" mode delays the next
     # restart until the next interval. "fail" mode does not restart the task
     # if "attempts" has been hit within the interval.
-      mode = "delay"
+      mode = "fail"
    }
    # The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@ -2510,17 +2510,17 @@ func (d *DispatchPayloadConfig) Validate() error {
 }
 var (
-	defaultServiceJobRestartPolicy = RestartPolicy{
+	DefaultServiceJobRestartPolicy = RestartPolicy{
 		Delay:    15 * time.Second,
 		Attempts: 2,
-		Interval: 1 * time.Minute,
+		Interval: 30 * time.Minute,
-		Mode:     RestartPolicyModeDelay,
+		Mode:     RestartPolicyModeFail,
 	}
-	defaultBatchJobRestartPolicy = RestartPolicy{
+	DefaultBatchJobRestartPolicy = RestartPolicy{
 		Delay:    15 * time.Second,
-		Attempts: 15,
+		Attempts: 3,
-		Interval: 7 * 24 * time.Hour,
+		Interval: 24 * time.Hour,
-		Mode:     RestartPolicyModeDelay,
+		Mode:     RestartPolicyModeFail,
 	}
 )
@ -2604,10 +2604,10 @@ func (r *RestartPolicy) Validate() error {
 func NewRestartPolicy(jobType string) *RestartPolicy {
 	switch jobType {
 	case JobTypeService, JobTypeSystem:
-		rp := defaultServiceJobRestartPolicy
+		rp := DefaultServiceJobRestartPolicy
 		return &rp
 	case JobTypeBatch:
-		rp := defaultBatchJobRestartPolicy
+		rp := DefaultBatchJobRestartPolicy
 		return &rp
 	}
 	return nil
--- a/website/source/api/json-jobs.html.md
+++ b/website/source/api/json-jobs.html.md
@ -91,10 +91,10 @@ Below is the JSON representation of the job outputted by `$ nomad init`:
                "Leader": false
            }],
            "RestartPolicy": {
-                "Interval": 300000000000,
+                "Interval": 1800000000000,
-                "Attempts": 10,
+                "Attempts": 2,
-                "Delay": 25000000000,
+                "Delay": 15000000000,
-                "Mode": "delay"
+                "Mode": "fail"
            },
            "EphemeralDisk": {
                "SizeMB": 300
--- a/website/source/docs/job-specification/restart.html.md
+++ b/website/source/docs/job-specification/restart.html.md
@ -17,7 +17,8 @@ description: |-
  </tr>
 </table>
-The `restart` stanza configures a group's behavior on task failure.
+The `restart` stanza configures a group's behavior on task failure. Restarts
 happen on the client that is running the task.
 ```hcl
 job "docs" {
@ -62,7 +63,7 @@ defaults by job type:
      attempts = 15
      delay    = "15s"
      interval = "168h"
-      mode     = "delay"
+      mode     = "fail"
    }
    ```
@ -73,7 +74,7 @@ defaults by job type:
      interval = "1m"
      attempts = 2
      delay    = "15s"
-      mode     = "delay"
+      mode     = "fail"
    }
    ```