Merge pull request #744 from hashicorp/f-on-restart

Restart on-success shouldn't be user specifiable
2016-02-02 17:40:05 -08:00 · 2016-02-02 17:40:05 -08:00 · 7a32ebcae1
parent db7997d08c 8aa3853399
commit 7a32ebcae1
13 changed files with 56 additions and 71 deletions
--- a/api/tasks.go
+++ b/api/tasks.go
@ -7,11 +7,10 @@ import (
 // RestartPolicy defines how the Nomad client restarts
 // tasks in a taskgroup when they fail
 type RestartPolicy struct {
-	Interval         time.Duration
-	Attempts         int
-	Delay            time.Duration
-	RestartOnSuccess bool
-	Mode             string
+	Interval time.Duration
+	Attempts int
+	Delay    time.Duration
+	Mode     string
 }

 // The ServiceCheck data model represents the consul health check that
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@ -115,7 +115,7 @@ func (r *AllocRunner) RestoreState() error {
 		r.restored[name] = struct{}{}

 		task := &structs.Task{Name: name}
-		restartTracker := newRestartTracker(r.RestartPolicy)
+		restartTracker := newRestartTracker(r.RestartPolicy, r.alloc.Job.Type)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, restartTracker, r.consulService)
 		r.tasks[name] = tr
@ -370,7 +370,7 @@ func (r *AllocRunner) Run() {

 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
-		restartTracker := newRestartTracker(r.RestartPolicy)
+		restartTracker := newRestartTracker(r.RestartPolicy, r.alloc.Job.Type)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, restartTracker, r.consulService)
 		r.tasks[task.Name] = tr
--- a/client/alloc_runner_test.go
+++ b/client/alloc_runner_test.go
@ -33,7 +33,8 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
 	alloc := mock.Alloc()
 	consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
 	if !restarts {
-		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0, RestartOnSuccess: false}
+		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
+		alloc.Job.Type = structs.JobTypeBatch
 	}

 	ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient)
--- a/client/restarts.go
+++ b/client/restarts.go
@ -10,9 +10,14 @@ import (
 // jitter is the percent of jitter added to restart delays.
 const jitter = 0.25

-func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
+func newRestartTracker(policy *structs.RestartPolicy, jobType string) *RestartTracker {
+	onSuccess := true
+	if jobType == structs.JobTypeBatch {
+		onSuccess = false
+	}
 	return &RestartTracker{
 		startTime: time.Now(),
+		onSuccess: onSuccess,
 		policy:    policy,
 		rand:      rand.New(rand.NewSource(time.Now().Unix())),
 	}
@ -20,6 +25,7 @@ func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker {

 type RestartTracker struct {
 	count     int       // Current number of attempts.
+	onSuccess bool      // Whether to restart on successful exit code.
 	startTime time.Time // When the interval began
 	policy    *structs.RestartPolicy
 	rand      *rand.Rand
@ -57,9 +63,9 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
 }

 // shouldRestart returns whether a restart should occur based on the exit code
-// and the RestartOnSuccess configuration.
+// and job type.
 func (r *RestartTracker) shouldRestart(exitCode int) bool {
-	return exitCode != 0 || r.policy.RestartOnSuccess
+	return exitCode != 0 || r.onSuccess
 }

 // jitter returns the delay time plus a jitter.
@ -77,5 +83,5 @@ func (r *RestartTracker) jitter() time.Duration {
 // Returns a tracker that never restarts.
 func noRestartsTracker() *RestartTracker {
 	policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
-	return newRestartTracker(policy)
+	return newRestartTracker(policy, structs.JobTypeBatch)
 }
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@ -9,11 +9,10 @@ import (

 func testPolicy(success bool, mode string) *structs.RestartPolicy {
 	return &structs.RestartPolicy{
-		Interval:         2 * time.Minute,
-		Delay:            1 * time.Second,
-		Attempts:         3,
-		Mode:             mode,
-		RestartOnSuccess: success,
+		Interval: 2 * time.Minute,
+		Delay:    1 * time.Second,
+		Attempts: 3,
+		Mode:     mode,
 	}
 }

@ -27,7 +26,7 @@ func withinJitter(expected, actual time.Duration) bool {
 func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeDelay)
-	rt := newRestartTracker(p)
+	rt := newRestartTracker(p, structs.JobTypeService)
 	for i := 0; i < p.Attempts; i++ {
 		actual, when := rt.NextRestart(127)
 		if !actual {
@ -53,7 +52,7 @@ func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 func TestClient_RestartTracker_ModeFail(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeFail)
-	rt := newRestartTracker(p)
+	rt := newRestartTracker(p, structs.JobTypeSystem)
 	for i := 0; i < p.Attempts; i++ {
 		actual, when := rt.NextRestart(127)
 		if !actual {
@ -73,7 +72,7 @@ func TestClient_RestartTracker_ModeFail(t *testing.T) {
 func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(false, structs.RestartPolicyModeDelay)
-	rt := newRestartTracker(p)
+	rt := newRestartTracker(p, structs.JobTypeBatch)
 	if shouldRestart, _ := rt.NextRestart(0); shouldRestart {
 		t.Fatalf("NextRestart() returned %v, expected: %v", shouldRestart, false)
 	}
@ -83,7 +82,7 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeFail)
 	p.Attempts = 0
-	rt := newRestartTracker(p)
+	rt := newRestartTracker(p, structs.JobTypeService)
 	if actual, when := rt.NextRestart(1); actual {
 		t.Fatalf("expect no restart, got restart/delay: %v", when)
 	}
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@ -49,7 +49,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) {

 	ctx := driver.NewExecContext(allocDir, alloc.ID)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
-	restartTracker := newRestartTracker(rp)
+	restartTracker := newRestartTracker(rp, alloc.Job.Type)
 	if !restarts {
 		restartTracker = noRestartsTracker()
 	}
--- a/command/init.go
+++ b/command/init.go
@ -114,9 +114,6 @@ job "example" {
 			# A delay between a task failing and a restart occuring.
 			delay = "25s"

-			# Whether the tasks should be restarted if the exit successfully.
-			on_success = true
-
 			# Mode controls what happens when a task has restarted "attempts"
 			# times within the interval. "delay" mode delays the next restart
 			# till the next interval. "fail" mode does not restart the task if
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@ -78,11 +78,10 @@ func TestParse(t *testing.T) {
 							"elb_checks":   "3",
 						},
 						RestartPolicy: &structs.RestartPolicy{
-							Interval:         10 * time.Minute,
-							Attempts:         5,
-							Delay:            15 * time.Second,
-							RestartOnSuccess: true,
-							Mode:             "delay",
+							Interval: 10 * time.Minute,
+							Attempts: 5,
+							Delay:    15 * time.Second,
+							Mode:     "delay",
 						},
 						Tasks: []*structs.Task{
 							&structs.Task{
@ -114,7 +113,7 @@ func TestParse(t *testing.T) {
 									CPU:      500,
 									MemoryMB: 128,
 									DiskMB:   10,
-									IOPS:     1,
+									IOPS:     0,
 									Networks: []*structs.NetworkResource{
 										&structs.NetworkResource{
 											MBits:         100,
--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@ -35,7 +35,6 @@ job "binstore-storagelocker" {
            attempts = 5
            interval = "10m"
            delay = "15s"
-            on_success = true
            mode = "delay"
        }
        task "binstore" {
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@ -77,11 +77,10 @@ func Job() *structs.Job {
 				Name:  "web",
 				Count: 10,
 				RestartPolicy: &structs.RestartPolicy{
-					Attempts:         3,
-					Interval:         10 * time.Minute,
-					Delay:            1 * time.Minute,
-					RestartOnSuccess: true,
-					Mode:             structs.RestartPolicyModeDelay,
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+					Mode:     structs.RestartPolicyModeDelay,
 				},
 				Tasks: []*structs.Task{
 					&structs.Task{
@ -156,11 +155,10 @@ func SystemJob() *structs.Job {
 				Name:  "web",
 				Count: 1,
 				RestartPolicy: &structs.RestartPolicy{
-					Attempts:         3,
-					Interval:         10 * time.Minute,
-					Delay:            1 * time.Minute,
-					RestartOnSuccess: true,
-					Mode:             structs.RestartPolicyModeDelay,
+					Attempts: 3,
+					Interval: 10 * time.Minute,
+					Delay:    1 * time.Minute,
+					Mode:     structs.RestartPolicyModeDelay,
 				},
 				Tasks: []*structs.Task{
 					&structs.Task{
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@ -1146,18 +1146,16 @@ type PeriodicLaunch struct {

 var (
 	defaultServiceJobRestartPolicy = RestartPolicy{
-		Delay:            15 * time.Second,
-		Attempts:         2,
-		Interval:         1 * time.Minute,
-		RestartOnSuccess: true,
-		Mode:             RestartPolicyModeDelay,
+		Delay:    15 * time.Second,
+		Attempts: 2,
+		Interval: 1 * time.Minute,
+		Mode:     RestartPolicyModeDelay,
 	}
 	defaultBatchJobRestartPolicy = RestartPolicy{
-		Delay:            15 * time.Second,
-		Attempts:         15,
-		Interval:         7 * 24 * time.Hour,
-		RestartOnSuccess: false,
-		Mode:             RestartPolicyModeDelay,
+		Delay:    15 * time.Second,
+		Attempts: 15,
+		Interval: 7 * 24 * time.Hour,
+		Mode:     RestartPolicyModeDelay,
 	}
 )

@ -1183,10 +1181,6 @@ type RestartPolicy struct {
 	// Delay is the time between a failure and a restart.
 	Delay time.Duration

-	// RestartOnSuccess determines whether a task should be restarted if it
-	// exited successfully.
-	RestartOnSuccess bool `mapstructure:"on_success"`
-
 	// Mode controls what happens when the task restarts more than attempt times
 	// in an interval.
 	Mode string
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@ -189,11 +189,10 @@ func TestJob_IsPeriodic(t *testing.T) {
 func TestTaskGroup_Validate(t *testing.T) {
 	tg := &TaskGroup{
 		RestartPolicy: &RestartPolicy{
-			Interval:         5 * time.Minute,
-			Delay:            10 * time.Second,
-			Attempts:         10,
-			RestartOnSuccess: true,
-			Mode:             RestartPolicyModeDelay,
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+			Mode:     RestartPolicyModeDelay,
 		},
 	}
 	err := tg.Validate()
@ -217,11 +216,10 @@ func TestTaskGroup_Validate(t *testing.T) {
 			&Task{},
 		},
 		RestartPolicy: &RestartPolicy{
-			Interval:         5 * time.Minute,
-			Delay:            10 * time.Second,
-			Attempts:         10,
-			RestartOnSuccess: true,
-			Mode:             RestartPolicyModeDelay,
+			Interval: 5 * time.Minute,
+			Delay:    10 * time.Second,
+			Attempts: 10,
+			Mode:     RestartPolicyModeDelay,
 		},
 	}
 	err = tg.Validate()
--- a/website/source/docs/jobspec/index.html.md
+++ b/website/source/docs/jobspec/index.html.md
@ -309,9 +309,6 @@ The `restart` object supports the following keys:
  time duration using the `s`, `m`, and `h` suffixes, such as `30s`. A random
  jitter of up to 25% is added to the the delay.

-* `on_success` - `on_success` controls whether a task is restarted when the
-  task exits successfully.
-
 *   `mode` - Controls the behavior when the task fails more than `attempts`
    times in an interval. Possible values are listed below:

@ -327,7 +324,6 @@ restart {
    attempts = 15
    delay = "15s"
    interval = "168h" # 7 days
-    on_success = false
    mode = "delay"
 }
 ```
@ -339,7 +335,6 @@ restart {
    interval = "1m"
    attempts = 2
    delay = "15s"
-    on_success = true
    mode = "delay"
 }
 ```