From 31c3e1295789886c9743e1a794f2b3b737ec68b2 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 18 Dec 2015 12:17:13 -0800
Subject: [PATCH 01/10] merge

---
 api/tasks.go                              |  22 ++---
 client/alloc_runner.go                    |   4 +-
 client/restarts.go                        | 109 ++++++++--------------
 client/restarts_test.go                   | 104 ++++++++++-----------
 client/task_runner.go                     |   6 +-
 client/task_runner_test.go                |   2 +-
 command/init.go                           |  21 +++--
 jobspec/parse.go                          |  19 ++--
 jobspec/parse_test.go                     |  18 +---
 jobspec/test-fixtures/basic.hcl           |   2 +
 nomad/structs/structs.go                  | 102 +++++++++++++-------
 nomad/structs/structs_test.go             |   2 +-
 website/source/docs/jobspec/index.html.md |  31 ++++--
 13 files changed, 223 insertions(+), 219 deletions(-)

diff --git a/api/tasks.go b/api/tasks.go
index c378c222d..3691cc78b 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -7,17 +7,11 @@ import (
 // RestartPolicy defines how the Nomad client restarts
 // tasks in a taskgroup when they fail
 type RestartPolicy struct {
-	Interval time.Duration
-	Attempts int
-	Delay    time.Duration
-}
-
-func NewRestartPolicy() *RestartPolicy {
-	return &RestartPolicy{
-		Attempts: 10,
-		Interval: 3 * time.Minute,
-		Delay:    5 * time.Second,
-	}
+	Interval         time.Duration
+	Attempts         int
+	Delay            time.Duration
+	RestartOnSuccess bool
+	Mode             string
 }
 
 // The ServiceCheck data model represents the consul health check that
@@ -54,11 +48,9 @@ type TaskGroup struct {
 
 // NewTaskGroup creates a new TaskGroup.
 func NewTaskGroup(name string, count int) *TaskGroup {
-	restartPolicy := NewRestartPolicy()
 	return &TaskGroup{
-		Name:          name,
-		Count:         count,
-		RestartPolicy: restartPolicy,
+		Name:  name,
+		Count: count,
 	}
 }
 
diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index 14b93dd0d..1b4a5ea0d 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -110,7 +110,7 @@ func (r *AllocRunner) RestoreState() error {
 		r.restored[name] = struct{}{}
 
 		task := &structs.Task{Name: name}
-		restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy)
+		restartTracker := NewRestartTracker(r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker,
 			r.consulService)
@@ -322,7 +322,7 @@ func (r *AllocRunner) Run() {
 
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
-		restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy)
+		restartTracker := NewRestartTracker(r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker,
 			r.consulService)
diff --git a/client/restarts.go b/client/restarts.go
index 7f42ad432..82dd10da9 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -6,84 +6,53 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-// The errorCounter keeps track of the number of times a process has exited
-// It returns the duration after which a task is restarted
-// For Batch jobs, the interval is set to zero value since the takss
-// will be restarted only upto maxAttempts times
-type restartTracker interface {
-	nextRestart(exitCode int) (bool, time.Duration)
-}
-
-func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker {
-	switch jobType {
-	case structs.JobTypeService:
-		return &serviceRestartTracker{
-			maxAttempts: restartPolicy.Attempts,
-			startTime:   time.Now(),
-			interval:    restartPolicy.Interval,
-			delay:       restartPolicy.Delay,
-		}
-	default:
-		return &batchRestartTracker{
-			maxAttempts: restartPolicy.Attempts,
-			delay:       restartPolicy.Delay,
-		}
+func NewRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
+	return &RestartTracker{
+		startTime: time.Now(),
+		policy:    policy,
 	}
 }
 
-// noRestartsTracker returns a RestartTracker that never restarts.
-func noRestartsTracker() restartTracker {
-	return &batchRestartTracker{maxAttempts: 0}
+type RestartTracker struct {
+	count     int       // Current number of attempts.
+	startTime time.Time // When the interval began
+	policy    *structs.RestartPolicy
 }
 
-type batchRestartTracker struct {
-	maxAttempts int
-	delay       time.Duration
-
-	count int
-}
-
-func (b *batchRestartTracker) increment() {
-	b.count += 1
-}
-
-func (b *batchRestartTracker) nextRestart(exitCode int) (bool, time.Duration) {
-	if b.count < b.maxAttempts && exitCode > 0 {
-		b.increment()
-		return true, b.delay
-	}
-	return false, 0
-}
-
-type serviceRestartTracker struct {
-	maxAttempts int
-	delay       time.Duration
-	interval    time.Duration
-
-	count     int
-	startTime time.Time
-}
-
-func (s *serviceRestartTracker) increment() {
-	s.count += 1
-}
-
-func (s *serviceRestartTracker) nextRestart(exitCode int) (bool, time.Duration) {
-	defer s.increment()
-	windowEndTime := s.startTime.Add(s.interval)
+func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
+	// Check if we have entered a new interval.
+	end := r.startTime.Add(r.policy.Interval)
 	now := time.Now()
-	// If the window of restart is over we wait until the delay duration
-	if now.After(windowEndTime) {
-		s.count = 0
-		s.startTime = time.Now()
-		return true, s.delay
+	if now.After(end) {
+		r.count = 0
+		r.startTime = now
+		return true, r.policy.Delay
 	}
 
-	// If we are within the delay duration and didn't exhaust all retries
-	if s.count < s.maxAttempts {
-		return true, s.delay
+	r.count++
+
+	// If we are under the attempts, restart with delay.
+	if r.count <= r.policy.Attempts {
+		return r.shouldRestart(exitCode), r.policy.Delay
 	}
 
-	// If we exhausted all the retries and are withing the time window
-	return true, windowEndTime.Sub(now)
+	// Don't restart since mode is "fail"
+	if r.policy.Mode == structs.RestartPolicyModeFail {
+		return false, 0
+	}
+
+	// Apply an artifical wait to enter the next interval
+	return r.shouldRestart(exitCode), end.Sub(now)
+}
+
+// shouldRestart returns whether a restart should occur based on the exit code
+// and the RestartOnSuccess configuration.
+func (r *RestartTracker) shouldRestart(exitCode int) bool {
+	return exitCode != 0 || r.policy.RestartOnSuccess && exitCode == 0
+}
+
+// Returns a tracker that never restarts.
+func noRestartsTracker() *RestartTracker {
+	policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
+	return NewRestartTracker(policy)
 }
diff --git a/client/restarts_test.go b/client/restarts_test.go
index 2faca00ff..b30e418f3 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -1,78 +1,74 @@
 package client
 
 import (
-	"github.com/hashicorp/nomad/nomad/structs"
 	"testing"
 	"time"
+
+	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-func TestTaskRunner_ServiceRestartCounter(t *testing.T) {
-	interval := 2 * time.Minute
-	delay := 1 * time.Second
-	attempts := 3
-	rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay})
+func testPolicy(success bool, mode string) *structs.RestartPolicy {
+	return &structs.RestartPolicy{
+		Interval:         2 * time.Minute,
+		Delay:            1 * time.Second,
+		Attempts:         3,
+		Mode:             mode,
+		RestartOnSuccess: success,
+	}
+}
 
-	for i := 0; i < attempts; i++ {
-		actual, when := rt.nextRestart(127)
+func TestClient_RestartTracker_ModeDelay(t *testing.T) {
+	t.Parallel()
+	p := testPolicy(true, structs.RestartPolicyModeDelay)
+	rt := NewRestartTracker(p)
+	for i := 0; i < p.Attempts; i++ {
+		actual, when := rt.NextRestart(127)
 		if !actual {
-			t.Fatalf("should restart returned %v, actual %v", actual, true)
+			t.Fatalf("NextRestart() returned %v, want %v", actual, true)
 		}
-		if when != delay {
-			t.Fatalf("nextRestart() returned %v; want %v", when, delay)
+		if when != p.Delay {
+			t.Fatalf("NextRestart() returned %v; want %v", when, p.Delay)
 		}
 	}
 
-	time.Sleep(1 * time.Second)
+	// Follow up restarts should cause delay.
 	for i := 0; i < 3; i++ {
-		actual, when := rt.nextRestart(127)
+		actual, when := rt.NextRestart(127)
 		if !actual {
 			t.Fail()
 		}
-		if !(when > delay && when < interval) {
-			t.Fatalf("nextRestart() returned %v; want less than %v and more than %v", when, interval, delay)
+		if !(when > p.Delay && when < p.Interval) {
+			t.Fatalf("NextRestart() returned %v; want less than %v and more than %v", when, p.Interval, p.Delay)
 		}
 	}
-
-}
-
-func TestTaskRunner_BatchRestartCounter(t *testing.T) {
-	attempts := 2
-	interval := 1 * time.Second
-	delay := 1 * time.Second
-	rt := newRestartTracker(structs.JobTypeBatch,
-		&structs.RestartPolicy{Attempts: attempts,
-			Interval: interval,
-			Delay:    delay,
-		},
-	)
-	for i := 0; i < attempts; i++ {
-		shouldRestart, when := rt.nextRestart(127)
-		if !shouldRestart {
-			t.Fatalf("should restart returned %v, actual %v", shouldRestart, true)
-		}
-		if when != delay {
-			t.Fatalf("Delay should be %v, actual: %v", delay, when)
-		}
-	}
-	actual, _ := rt.nextRestart(1)
-	if actual {
-		t.Fatalf("Expect %v, Actual: %v", false, actual)
-	}
 }
 
-func TestTaskRunner_BatchRestartOnSuccess(t *testing.T) {
-	attempts := 2
-	interval := 1 * time.Second
-	delay := 1 * time.Second
-	rt := newRestartTracker(structs.JobTypeBatch,
-		&structs.RestartPolicy{Attempts: attempts,
-			Interval: interval,
-			Delay:    delay,
-		},
-	)
-	shouldRestart, _ := rt.nextRestart(0)
-	if shouldRestart {
-		t.Fatalf("should restart returned %v, expected: %v", shouldRestart, false)
+func TestClient_RestartTracker_ModeFail(t *testing.T) {
+	t.Parallel()
+	p := testPolicy(true, structs.RestartPolicyModeFail)
+	rt := NewRestartTracker(p)
+	for i := 0; i < p.Attempts; i++ {
+		actual, when := rt.NextRestart(127)
+		if !actual {
+			t.Fatalf("NextRestart() returned %v, want %v", actual, true)
+		}
+		if when != p.Delay {
+			t.Fatalf("NextRestart() returned %v; want %v", when, p.Delay)
+		}
+	}
+
+	// Next restart should cause fail
+	if actual, _ := rt.NextRestart(127); actual {
+		t.Fail()
+	}
+}
+
+func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
+	t.Parallel()
+	p := testPolicy(false, structs.RestartPolicyModeDelay)
+	rt := NewRestartTracker(p)
+	if shouldRestart, _ := rt.NextRestart(0); shouldRestart {
+		t.Fatalf("NextRestart() returned %v, expected: %v", shouldRestart, false)
 	}
 
 }
diff --git a/client/task_runner.go b/client/task_runner.go
index 4f10900a3..55c920bd8 100644
--- a/client/task_runner.go
+++ b/client/task_runner.go
@@ -24,7 +24,7 @@ type TaskRunner struct {
 	logger         *log.Logger
 	ctx            *driver.ExecContext
 	alloc          *structs.Allocation
-	restartTracker restartTracker
+	restartTracker *RestartTracker
 	consulService  *ConsulService
 
 	task     *structs.Task
@@ -53,7 +53,7 @@ type TaskStateUpdater func(taskName string)
 func NewTaskRunner(logger *log.Logger, config *config.Config,
 	updater TaskStateUpdater, ctx *driver.ExecContext,
 	alloc *structs.Allocation, task *structs.Task, state *structs.TaskState,
-	restartTracker restartTracker, consulService *ConsulService) *TaskRunner {
+	restartTracker *RestartTracker, consulService *ConsulService) *TaskRunner {
 
 	tc := &TaskRunner{
 		config:         config,
@@ -280,7 +280,7 @@ func (r *TaskRunner) run() {
 		}
 
 		// Check if we should restart. If not mark task as dead and exit.
-		shouldRestart, when := r.restartTracker.nextRestart(waitRes.ExitCode)
+		shouldRestart, when := r.restartTracker.NextRestart(waitRes.ExitCode)
 		waitEvent := r.waitErrorToEvent(waitRes)
 		if !shouldRestart {
 			r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID)
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index f6b581eb7..4f1437052 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -42,7 +42,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) {
 
 	ctx := driver.NewExecContext(allocDir, alloc.ID)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
-	restartTracker := newRestartTracker(structs.JobTypeService, rp)
+	restartTracker := NewRestartTracker(rp)
 	if !restarts {
 		restartTracker = noRestartsTracker()
 	}
diff --git a/command/init.go b/command/init.go
index 39cfbc4cd..cf2d0aa3b 100644
--- a/command/init.go
+++ b/command/init.go
@@ -104,15 +104,24 @@ job "example" {
 		# Defaults to 1
 		# count = 1
 
-		# Restart Policy - This block defines the restart policy for TaskGroups,
-		# the attempts value defines the number of restarts Nomad will do if Tasks
-		# in this TaskGroup fails in a rolling window of interval duration
-		# The delay value makes Nomad wait for that duration to restart after a Task
-		# fails or crashes.
+		# Configure the restart policy for the task group. If not provided, a
+		# default is used based on the job type.
 		restart {
-			interval = "5m"
+			# The number of attempts to run the job within the specified interval.
 			attempts = 10
+			interval = "5m"
+			
+			# A delay between a task failing and a restart occuring.
 			delay = "25s"
+
+			# Whether the tasks should be restarted if the exit successfully.
+			on_success = true
+
+			# Mode controls what happens when a task has restarted "attempts"
+			# times within the interval. "delay" mode delays the next restart
+			# till the next interval. "fail" mode does not restart the task if
+			# "attempts" has been hit within the interval.
+			mode = "delay"
 		}
 
 		# Define a task to run
diff --git a/jobspec/parse.go b/jobspec/parse.go
index 3559744fd..52df6e35b 100644
--- a/jobspec/parse.go
+++ b/jobspec/parse.go
@@ -159,10 +159,9 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error {
 		result.TaskGroups = make([]*structs.TaskGroup, len(tasks), len(tasks)*2)
 		for i, t := range tasks {
 			result.TaskGroups[i] = &structs.TaskGroup{
-				Name:          t.Name,
-				Count:         1,
-				Tasks:         []*structs.Task{t},
-				RestartPolicy: structs.NewRestartPolicy(result.Type),
+				Name:  t.Name,
+				Count: 1,
+				Tasks: []*structs.Task{t},
 			}
 		}
 	}
@@ -230,11 +229,10 @@ func parseGroups(result *structs.Job, list *ast.ObjectList) error {
 				return err
 			}
 		}
-		g.RestartPolicy = structs.NewRestartPolicy(result.Type)
 
 		// Parse restart policy
 		if o := listVal.Filter("restart"); len(o.Items) > 0 {
-			if err := parseRestartPolicy(g.RestartPolicy, o); err != nil {
+			if err := parseRestartPolicy(&g.RestartPolicy, o); err != nil {
 				return err
 			}
 		}
@@ -267,12 +265,9 @@ func parseGroups(result *structs.Job, list *ast.ObjectList) error {
 	return nil
 }
 
-func parseRestartPolicy(final *structs.RestartPolicy, list *ast.ObjectList) error {
+func parseRestartPolicy(final **structs.RestartPolicy, list *ast.ObjectList) error {
 	list = list.Elem()
-	if len(list.Items) == 0 {
-		return nil
-	}
-	if len(list.Items) != 1 {
+	if len(list.Items) > 1 {
 		return fmt.Errorf("only one 'restart' block allowed")
 	}
 
@@ -297,7 +292,7 @@ func parseRestartPolicy(final *structs.RestartPolicy, list *ast.ObjectList) erro
 		return err
 	}
 
-	*final = result
+	*final = &result
 	return nil
 }
 
diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go
index 4497348eb..df04e284c 100644
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -48,11 +48,6 @@ func TestParse(t *testing.T) {
 					&structs.TaskGroup{
 						Name:  "outside",
 						Count: 1,
-						RestartPolicy: &structs.RestartPolicy{
-							Attempts: 2,
-							Interval: 1 * time.Minute,
-							Delay:    15 * time.Second,
-						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "outside",
@@ -83,9 +78,11 @@ func TestParse(t *testing.T) {
 							"elb_checks":   "3",
 						},
 						RestartPolicy: &structs.RestartPolicy{
-							Interval: 10 * time.Minute,
-							Attempts: 5,
-							Delay:    15 * time.Second,
+							Interval:         10 * time.Minute,
+							Attempts:         5,
+							Delay:            15 * time.Second,
+							RestartOnSuccess: true,
+							Mode:             "delay",
 						},
 						Tasks: []*structs.Task{
 							&structs.Task{
@@ -271,11 +268,6 @@ func TestParse(t *testing.T) {
 					&structs.TaskGroup{
 						Name:  "bar",
 						Count: 1,
-						RestartPolicy: &structs.RestartPolicy{
-							Attempts: 2,
-							Interval: 1 * time.Minute,
-							Delay:    15 * time.Second,
-						},
 						Tasks: []*structs.Task{
 							&structs.Task{
 								Name:   "bar",
diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl
index 9696fdef8..549e380ef 100644
--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@@ -35,6 +35,8 @@ job "binstore-storagelocker" {
             attempts = 5
             interval = "10m"
             delay = "15s"
+            on_success = true
+            mode = "delay"
         }
         task "binstore" {
             driver = "docker"
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index ec4f08986..bed272309 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -19,17 +19,8 @@ import (
 )
 
 var (
-	ErrNoLeader                    = fmt.Errorf("No cluster leader")
-	ErrNoRegionPath                = fmt.Errorf("No path to region")
-	defaultServiceJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 2,
-		Interval: 1 * time.Minute,
-	}
-	defaultBatchJobRestartPolicy = RestartPolicy{
-		Delay:    15 * time.Second,
-		Attempts: 15,
-	}
+	ErrNoLeader     = fmt.Errorf("No cluster leader")
+	ErrNoRegionPath = fmt.Errorf("No path to region")
 )
 
 type MessageType uint8
@@ -786,8 +777,9 @@ type Job struct {
 // InitFields is used to initialize fields in the Job. This should be called
 // when registering a Job.
 func (j *Job) InitFields() {
-	// Initialize the service block.
-	j.InitAllServiceFields()
+	for _, tg := range j.TaskGroups {
+		tg.InitFields(j)
+	}
 
 	// If the job is batch then make it GC.
 	if j.Type == JobTypeBatch {
@@ -795,16 +787,6 @@ func (j *Job) InitFields() {
 	}
 }
 
-// InitAllServiceFields traverses all Task Groups and makes them
-// interpolate Job, Task group and Task names in all Service names.
-// It also generates the check names if they are not set. This method also
-// generates Check and Service IDs
-func (j *Job) InitAllServiceFields() {
-	for _, tg := range j.TaskGroups {
-		tg.InitAllServiceFields(j.Name)
-	}
-}
-
 // Validate is used to sanity check a job input
 func (j *Job) Validate() error {
 	var mErr multierror.Error
@@ -984,15 +966,61 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
 	return time.Time{}
 }
 
-// RestartPolicy influences how Nomad restarts Tasks when they
-// crash or fail.
+var (
+	defaultServiceJobRestartPolicy = RestartPolicy{
+		Delay:            15 * time.Second,
+		Attempts:         2,
+		Interval:         1 * time.Minute,
+		RestartOnSuccess: true,
+		Mode:             RestartPolicyModeDelay,
+	}
+	defaultBatchJobRestartPolicy = RestartPolicy{
+		Delay:            15 * time.Second,
+		Attempts:         15,
+		Interval:         7 * 24 * time.Hour,
+		RestartOnSuccess: false,
+		Mode:             RestartPolicyModeDelay,
+	}
+)
+
+const (
+	// RestartPolicyModeDelay causes an artificial delay till the next interval is
+	// reached when the specified attempts have been reached in the interval.
+	RestartPolicyModeDelay = "delay"
+
+	// RestartPolicyModeFail causes a job to fail if the specified number of
+	// attempts are reached within an interval.
+	RestartPolicyModeFail = "fail"
+)
+
+// RestartPolicy configures how Tasks are restarted when they crash or fail.
 type RestartPolicy struct {
+	// Attempts is the number of restart that will occur in an interval.
 	Attempts int
+
+	// Interval is a duration in which we can limit the number of restarts
+	// within.
 	Interval time.Duration
-	Delay    time.Duration
+
+	// Delay is the time between a failure and a restart.
+	Delay time.Duration
+
+	// RestartOnSuccess determines whether a task should be restarted if it
+	// exited successfully.
+	RestartOnSuccess bool `mapstructure:"on_success"`
+
+	// Mode controls what happens when the task restarts more than attempt times
+	// in an interval.
+	Mode string
 }
 
 func (r *RestartPolicy) Validate() error {
+	switch r.Mode {
+	case RestartPolicyModeDelay, RestartPolicyModeFail:
+	default:
+		return fmt.Errorf("Unsupported restart mode: %q", r.Mode)
+	}
+
 	if r.Interval == 0 {
 		return nil
 	}
@@ -1040,12 +1068,15 @@ type TaskGroup struct {
 	Meta map[string]string
 }
 
-// InitAllServiceFields traverses over all Tasks and makes them to interpolate
-// values of Job, Task Group and Task names in all Service Names.
-// It also generates service ids, check ids and check names
-func (tg *TaskGroup) InitAllServiceFields(job string) {
+// InitFields is used to initialize fields in the TaskGroup.
+func (tg *TaskGroup) InitFields(job *Job) {
+	// Set the default restart policy.
+	if tg.RestartPolicy == nil {
+		tg.RestartPolicy = NewRestartPolicy(job.Type)
+	}
+
 	for _, task := range tg.Tasks {
-		task.InitAllServiceFields(job, tg.Name)
+		task.InitFields(job, tg)
 	}
 }
 
@@ -1240,10 +1271,15 @@ type Task struct {
 	Meta map[string]string
 }
 
-// InitAllServiceFields interpolates values of Job, Task Group
+// InitFields initializes fields in the task.
+func (t *Task) InitFields(job *Job, tg *TaskGroup) {
+	t.InitServiceFields(job.Name, tg.Name)
+}
+
+// InitServiceFields interpolates values of Job, Task Group
 // and Tasks in all the service Names of a Task. This also generates the service
 // id, check id and check names.
-func (t *Task) InitAllServiceFields(job string, taskGroup string) {
+func (t *Task) InitServiceFields(job string, taskGroup string) {
 	for _, service := range t.Services {
 		service.InitFields(job, taskGroup, t.Name)
 	}
diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go
index 5111ea4a5..981697d8b 100644
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -505,7 +505,7 @@ func TestJob_ExpandServiceNames(t *testing.T) {
 		},
 	}
 
-	j.InitAllServiceFields()
+	j.InitFields()
 
 	service1Name := j.TaskGroups[0].Tasks[0].Services[0].Name
 	if service1Name != "my-job-web-frontend-default" {
diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md
index 99aefe2b4..19a97c180 100644
--- a/website/source/docs/jobspec/index.html.md
+++ b/website/source/docs/jobspec/index.html.md
@@ -236,26 +236,37 @@ The `network` object supports the following keys:
 
 The `restart` object supports the following keys:
 
-* `attempts` - For `batch` jobs, `attempts` is the maximum number of restarts
-  allowed before the task is failed. For non-batch jobs, the `attempts` is the
-  number of restarts allowed in an `interval` before a restart delay is added.
+* `attempts` - `attempts` is the number of restarts allowed in an `interval`.
 
-* `interval` - `interval` is only valid on non-batch jobs and is a time duration
-  that can be specified using the `s`, `m`, and `h` suffixes, such as `30s`.
-  The `interval` begins when the first task starts and ensures that only
-  `attempts` number of restarts happens within it. If more than `attempts`
-  number of failures happen, the restart is delayed till after the `interval`,
-  which is then reset.
+* `interval` - `interval` is a time duration that can be specified using the
+  `s`, `m`, and `h` suffixes, such as `30s`.  The `interval` begins when the
+  first task starts and ensures that only `attempts` number of restarts happens
+  within it. If more than `attempts` number of failures happen, behavior is
+  controlled by `mode`.
 
 * `delay` - A duration to wait before restarting a task. It is specified as a
   time duration using the `s`, `m`, and `h` suffixes, such as `30s`.
 
+* `on_success` - `on_success` controls whether a task is restarted when the
+  task exits successfully.
+
+*   `mode` - Controls the behavior when the task fails more than `attempts`
+    times in an interval. Possible values are listed below:
+
+    * `delay` - `delay` will delay the next restart until the next `interval` is
+      reached.
+
+    * `fail` - `fail` will not restart the task again.
+
 The default `batch` restart policy is:
 
 ```
 restart {
     attempts = 15
     delay = "15s"
+    interval = "168h" # 7 days
+    on_success = false
+    mode = "delay"
 }
 ```
 
@@ -266,6 +277,8 @@ restart {
     interval = "1m"
     attempts = 2
     delay = "15s"
+    on_success = true
+    mode = "delay"
 }
 ```
 

From f1c41ed4cf8c111463f7c8b49f824cdee14ea811 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 15 Dec 2015 17:13:04 -0800
Subject: [PATCH 02/10] Fix mock

---
 nomad/mock/mock.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go
index 2e0057c8e..abdfc859f 100644
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -126,7 +126,7 @@ func Job() *structs.Job {
 		CreateIndex: 42,
 		ModifyIndex: 99,
 	}
-	job.InitAllServiceFields()
+	job.InitFields()
 	return job
 }
 

From ea119b72975ef30499569a3b3a833d60155784b3 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 15 Dec 2015 17:35:55 -0800
Subject: [PATCH 03/10] Update restart policy in mock

---
 nomad/mock/mock.go | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go
index abdfc859f..b3a468d64 100644
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -76,9 +76,11 @@ func Job() *structs.Job {
 				Name:  "web",
 				Count: 10,
 				RestartPolicy: &structs.RestartPolicy{
-					Attempts: 3,
-					Interval: 10 * time.Minute,
-					Delay:    1 * time.Minute,
+					Attempts:         3,
+					Interval:         10 * time.Minute,
+					Delay:            1 * time.Minute,
+					RestartOnSuccess: true,
+					Mode:             structs.RestartPolicyModeDelay,
 				},
 				Tasks: []*structs.Task{
 					&structs.Task{
@@ -151,9 +153,11 @@ func SystemJob() *structs.Job {
 				Name:  "web",
 				Count: 1,
 				RestartPolicy: &structs.RestartPolicy{
-					Attempts: 3,
-					Interval: 10 * time.Minute,
-					Delay:    1 * time.Minute,
+					Attempts:         3,
+					Interval:         10 * time.Minute,
+					Delay:            1 * time.Minute,
+					RestartOnSuccess: true,
+					Mode:             structs.RestartPolicyModeDelay,
 				},
 				Tasks: []*structs.Task{
 					&structs.Task{

From 55cc8c61b3fb361c6770d69fa2549139d8e7f4eb Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 18 Dec 2015 12:17:50 -0800
Subject: [PATCH 04/10] fix

---
 command/run.go                |  3 +++
 command/run_test.go           |  1 +
 command/validate.go           |  3 +++
 command/validate_test.go      |  1 +
 nomad/structs/structs_test.go | 16 ++++++++++------
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/command/run.go b/command/run.go
index 5b674b81b..d25692135 100644
--- a/command/run.go
+++ b/command/run.go
@@ -80,6 +80,9 @@ func (c *RunCommand) Run(args []string) int {
 		return 1
 	}
 
+	// Initialize any fields that need to be.
+	job.InitFields()
+
 	// Check that the job is valid
 	if err := job.Validate(); err != nil {
 		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
diff --git a/command/run_test.go b/command/run_test.go
index c797ef406..a61f598f2 100644
--- a/command/run_test.go
+++ b/command/run_test.go
@@ -77,6 +77,7 @@ func TestRunCommand_Fails(t *testing.T) {
 	defer os.Remove(fh3.Name())
 	_, err = fh3.WriteString(`
 job "job1" {
+	type = "service"
 	datacenters = [ "dc1" ]
 	group "group1" {
 		count = 1
diff --git a/command/validate.go b/command/validate.go
index 10d03eaca..56bb166d4 100644
--- a/command/validate.go
+++ b/command/validate.go
@@ -48,6 +48,9 @@ func (c *ValidateCommand) Run(args []string) int {
 		return 1
 	}
 
+	// Initialize any fields that need to be.
+	job.InitFields()
+
 	// Check that the job is valid
 	if err := job.Validate(); err != nil {
 		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
diff --git a/command/validate_test.go b/command/validate_test.go
index 7e61c19b6..606f75a4a 100644
--- a/command/validate_test.go
+++ b/command/validate_test.go
@@ -24,6 +24,7 @@ func TestValidateCommand(t *testing.T) {
 	defer os.Remove(fh.Name())
 	_, err = fh.WriteString(`
 job "job1" {
+	type = "service"
 	datacenters = [ "dc1" ]
 	group "group1" {
 		count = 1
diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go
index 981697d8b..2b74c3f1a 100644
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -115,9 +115,11 @@ func TestJob_IsPeriodic(t *testing.T) {
 func TestTaskGroup_Validate(t *testing.T) {
 	tg := &TaskGroup{
 		RestartPolicy: &RestartPolicy{
-			Interval: 5 * time.Minute,
-			Delay:    10 * time.Second,
-			Attempts: 10,
+			Interval:         5 * time.Minute,
+			Delay:            10 * time.Second,
+			Attempts:         10,
+			RestartOnSuccess: true,
+			Mode:             RestartPolicyModeDelay,
 		},
 	}
 	err := tg.Validate()
@@ -141,9 +143,11 @@ func TestTaskGroup_Validate(t *testing.T) {
 			&Task{},
 		},
 		RestartPolicy: &RestartPolicy{
-			Interval: 5 * time.Minute,
-			Delay:    10 * time.Second,
-			Attempts: 10,
+			Interval:         5 * time.Minute,
+			Delay:            10 * time.Second,
+			Attempts:         10,
+			RestartOnSuccess: true,
+			Mode:             RestartPolicyModeDelay,
 		},
 	}
 	err = tg.Validate()

From 156d5845d18bf73ad6f9fe19d514b09d702829b5 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 15 Dec 2015 18:08:53 -0800
Subject: [PATCH 05/10] Fix api test

---
 api/compose_test.go | 1 -
 api/tasks_test.go   | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/api/compose_test.go b/api/compose_test.go
index bc76895a9..f4b21fcbd 100644
--- a/api/compose_test.go
+++ b/api/compose_test.go
@@ -69,7 +69,6 @@ func TestCompose(t *testing.T) {
 						Operand: "=",
 					},
 				},
-				RestartPolicy: NewRestartPolicy(),
 				Tasks: []*Task{
 					&Task{
 						Name:   "task1",
diff --git a/api/tasks_test.go b/api/tasks_test.go
index 75f29996d..bbdf12550 100644
--- a/api/tasks_test.go
+++ b/api/tasks_test.go
@@ -8,9 +8,8 @@ import (
 func TestTaskGroup_NewTaskGroup(t *testing.T) {
 	grp := NewTaskGroup("grp1", 2)
 	expect := &TaskGroup{
-		Name:          "grp1",
-		Count:         2,
-		RestartPolicy: NewRestartPolicy(),
+		Name:  "grp1",
+		Count: 2,
 	}
 	if !reflect.DeepEqual(grp, expect) {
 		t.Fatalf("expect: %#v, got: %#v", expect, grp)

From a5e9e2068c9dcd60e7563ea3e374d37ea49c7ee8 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 16 Dec 2015 14:39:37 -0800
Subject: [PATCH 06/10] Make NewRestartTracker private

---
 client/alloc_runner.go     | 4 ++--
 client/restarts.go         | 4 ++--
 client/restarts_test.go    | 6 +++---
 client/task_runner_test.go | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/client/alloc_runner.go b/client/alloc_runner.go
index 1b4a5ea0d..71de749c7 100644
--- a/client/alloc_runner.go
+++ b/client/alloc_runner.go
@@ -110,7 +110,7 @@ func (r *AllocRunner) RestoreState() error {
 		r.restored[name] = struct{}{}
 
 		task := &structs.Task{Name: name}
-		restartTracker := NewRestartTracker(r.RestartPolicy)
+		restartTracker := newRestartTracker(r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker,
 			r.consulService)
@@ -322,7 +322,7 @@ func (r *AllocRunner) Run() {
 
 		// Merge in the task resources
 		task.Resources = alloc.TaskResources[task.Name]
-		restartTracker := NewRestartTracker(r.RestartPolicy)
+		restartTracker := newRestartTracker(r.RestartPolicy)
 		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx,
 			r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker,
 			r.consulService)
diff --git a/client/restarts.go b/client/restarts.go
index 82dd10da9..937a8c851 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -6,7 +6,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-func NewRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
+func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
 	return &RestartTracker{
 		startTime: time.Now(),
 		policy:    policy,
@@ -54,5 +54,5 @@ func (r *RestartTracker) shouldRestart(exitCode int) bool {
 // Returns a tracker that never restarts.
 func noRestartsTracker() *RestartTracker {
 	policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
-	return NewRestartTracker(policy)
+	return newRestartTracker(policy)
 }
diff --git a/client/restarts_test.go b/client/restarts_test.go
index b30e418f3..04f2d362d 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -20,7 +20,7 @@ func testPolicy(success bool, mode string) *structs.RestartPolicy {
 func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeDelay)
-	rt := NewRestartTracker(p)
+	rt := newRestartTracker(p)
 	for i := 0; i < p.Attempts; i++ {
 		actual, when := rt.NextRestart(127)
 		if !actual {
@@ -46,7 +46,7 @@ func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 func TestClient_RestartTracker_ModeFail(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeFail)
-	rt := NewRestartTracker(p)
+	rt := newRestartTracker(p)
 	for i := 0; i < p.Attempts; i++ {
 		actual, when := rt.NextRestart(127)
 		if !actual {
@@ -66,7 +66,7 @@ func TestClient_RestartTracker_ModeFail(t *testing.T) {
 func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(false, structs.RestartPolicyModeDelay)
-	rt := NewRestartTracker(p)
+	rt := newRestartTracker(p)
 	if shouldRestart, _ := rt.NextRestart(0); shouldRestart {
 		t.Fatalf("NextRestart() returned %v, expected: %v", shouldRestart, false)
 	}
diff --git a/client/task_runner_test.go b/client/task_runner_test.go
index 4f1437052..eae7eb8e6 100644
--- a/client/task_runner_test.go
+++ b/client/task_runner_test.go
@@ -42,7 +42,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) {
 
 	ctx := driver.NewExecContext(allocDir, alloc.ID)
 	rp := structs.NewRestartPolicy(structs.JobTypeService)
-	restartTracker := NewRestartTracker(rp)
+	restartTracker := newRestartTracker(rp)
 	if !restarts {
 		restartTracker = noRestartsTracker()
 	}

From 307fbef7194fd7a5cf78137841c635194bcb1531 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 17 Dec 2015 10:37:53 -0800
Subject: [PATCH 07/10] Add jitter

---
 client/restarts.go | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/client/restarts.go b/client/restarts.go
index 937a8c851..bf3921dba 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -1,6 +1,7 @@
 package client
 
 import (
+	"math/rand"
 	"time"
 
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -10,6 +11,7 @@ func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
 	return &RestartTracker{
 		startTime: time.Now(),
 		policy:    policy,
+		rand:      rand.New(rand.NewSource(time.Now().Unix())),
 	}
 }
 
@@ -17,6 +19,7 @@ type RestartTracker struct {
 	count     int       // Current number of attempts.
 	startTime time.Time // When the interval began
 	policy    *structs.RestartPolicy
+	rand      *rand.Rand
 }
 
 func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
@@ -26,14 +29,14 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
 	if now.After(end) {
 		r.count = 0
 		r.startTime = now
-		return true, r.policy.Delay
+		return true, r.jitter()
 	}
 
 	r.count++
 
 	// If we are under the attempts, restart with delay.
 	if r.count <= r.policy.Attempts {
-		return r.shouldRestart(exitCode), r.policy.Delay
+		return r.shouldRestart(exitCode), r.jitter()
 	}
 
 	// Don't restart since mode is "fail"
@@ -48,7 +51,14 @@ func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
 // shouldRestart returns whether a restart should occur based on the exit code
 // and the RestartOnSuccess configuration.
 func (r *RestartTracker) shouldRestart(exitCode int) bool {
-	return exitCode != 0 || r.policy.RestartOnSuccess && exitCode == 0
+	return exitCode != 0 || r.policy.RestartOnSuccess
+}
+
+// jitter returns the delay time plus a jitter.
+func (r *RestartTracker) jitter() time.Duration {
+	d := r.policy.Delay.Nanoseconds()
+	j := r.rand.Int63n(d) / 4 // Up to 25% jitter
+	return time.Duration(d + j)
 }
 
 // Returns a tracker that never restarts.

From 0a17dc6eca0ea458e89d24c2dc59fcb9f839c121 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 17 Dec 2015 16:39:47 -0800
Subject: [PATCH 08/10] Document jitter

---
 website/source/docs/jobspec/index.html.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md
index 19a97c180..f55251c2b 100644
--- a/website/source/docs/jobspec/index.html.md
+++ b/website/source/docs/jobspec/index.html.md
@@ -245,7 +245,8 @@ The `restart` object supports the following keys:
   controlled by `mode`.
 
 * `delay` - A duration to wait before restarting a task. It is specified as a
-  time duration using the `s`, `m`, and `h` suffixes, such as `30s`.
+  time duration using the `s`, `m`, and `h` suffixes, such as `30s`. A random
+  jitter of up to 25% is added to the the delay.
 
 * `on_success` - `on_success` controls whether a task is restarted when the
   task exits successfully.

From b0f321c2c76ce33e2f6d379f4d2fa350b8d42d5b Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 18 Dec 2015 12:11:12 -0800
Subject: [PATCH 09/10] Fix test because of jitter

---
 client/restarts.go      |  7 +++++--
 client/restarts_test.go | 15 +++++++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/client/restarts.go b/client/restarts.go
index bf3921dba..6da456e83 100644
--- a/client/restarts.go
+++ b/client/restarts.go
@@ -7,6 +7,9 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
+// jitter is the percent of jitter added to restart delays.
+const jitter = 0.25
+
 func newRestartTracker(policy *structs.RestartPolicy) *RestartTracker {
 	return &RestartTracker{
 		startTime: time.Now(),
@@ -57,8 +60,8 @@ func (r *RestartTracker) shouldRestart(exitCode int) bool {
 // jitter returns the delay time plus a jitter.
 func (r *RestartTracker) jitter() time.Duration {
 	d := r.policy.Delay.Nanoseconds()
-	j := r.rand.Int63n(d) / 4 // Up to 25% jitter
-	return time.Duration(d + j)
+	j := float64(r.rand.Int63n(d)) * jitter
+	return time.Duration(d + int64(j))
 }
 
 // Returns a tracker that never restarts.
diff --git a/client/restarts_test.go b/client/restarts_test.go
index 04f2d362d..79e4d2a56 100644
--- a/client/restarts_test.go
+++ b/client/restarts_test.go
@@ -17,6 +17,13 @@ func testPolicy(success bool, mode string) *structs.RestartPolicy {
 	}
 }
 
+// withinJitter is a helper that returns whether the returned delay is within
+// the jitter.
+func withinJitter(expected, actual time.Duration) bool {
+	return float64((actual.Nanoseconds()-expected.Nanoseconds())/
+		expected.Nanoseconds()) <= jitter
+}
+
 func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 	t.Parallel()
 	p := testPolicy(true, structs.RestartPolicyModeDelay)
@@ -26,8 +33,8 @@ func TestClient_RestartTracker_ModeDelay(t *testing.T) {
 		if !actual {
 			t.Fatalf("NextRestart() returned %v, want %v", actual, true)
 		}
-		if when != p.Delay {
-			t.Fatalf("NextRestart() returned %v; want %v", when, p.Delay)
+		if !withinJitter(p.Delay, when) {
+			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
 		}
 	}
 
@@ -52,8 +59,8 @@ func TestClient_RestartTracker_ModeFail(t *testing.T) {
 		if !actual {
 			t.Fatalf("NextRestart() returned %v, want %v", actual, true)
 		}
-		if when != p.Delay {
-			t.Fatalf("NextRestart() returned %v; want %v", when, p.Delay)
+		if !withinJitter(p.Delay, when) {
+			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
 		}
 	}
 

From 423d16908ee0174825518c2216f299588c353add Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Fri, 18 Dec 2015 12:24:53 -0800
Subject: [PATCH 10/10] Go vet

---
 nomad/core_sched_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go
index 54c876051..4d71de5bc 100644
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@@ -192,7 +192,7 @@ func TestCoreScheduler_JobGC(t *testing.T) {
 		// Should still exist
 		out, err := state.JobByID(job.ID)
 		if err != nil {
-			t.Fatalf("test(%s) err: %v", err)
+			t.Fatalf("test(%s) err: %v", test.test, err)
 		}
 		if (test.shouldExist && out == nil) || (!test.shouldExist && out != nil) {
 			t.Fatalf("test(%s) bad: %v", test.test, out)
@@ -200,7 +200,7 @@ func TestCoreScheduler_JobGC(t *testing.T) {
 
 		outE, err := state.EvalByID(eval.ID)
 		if err != nil {
-			t.Fatalf("test(%s) err: %v", err)
+			t.Fatalf("test(%s) err: %v", test.test, err)
 		}
 		if (test.shouldExist && outE == nil) || (!test.shouldExist && outE != nil) {
 			t.Fatalf("test(%s) bad: %v", test.test, out)
@@ -208,7 +208,7 @@ func TestCoreScheduler_JobGC(t *testing.T) {
 
 		outA, err := state.AllocByID(alloc.ID)
 		if err != nil {
-			t.Fatalf("test(%s) err: %v", err)
+			t.Fatalf("test(%s) err: %v", test.test, err)
 		}
 		if (test.shouldExist && outA == nil) || (!test.shouldExist && outA != nil) {
 			t.Fatalf("test(%s) bad: %v", test.test, outA)