2018-06-11 20:33:18 +00:00
|
|
|
package restarts
|
2015-11-05 19:12:31 +00:00
|
|
|
|
|
|
|
import (
|
2016-02-29 00:56:05 +00:00
|
|
|
"fmt"
|
2015-11-05 19:12:31 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
2015-12-18 20:17:13 +00:00
|
|
|
|
2019-12-11 20:21:39 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2018-10-10 03:05:22 +00:00
|
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
2015-11-05 19:12:31 +00:00
|
|
|
)
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
func testPolicy(success bool, mode string) *structs.RestartPolicy {
|
|
|
|
return &structs.RestartPolicy{
|
2016-02-02 23:08:07 +00:00
|
|
|
Interval: 2 * time.Minute,
|
|
|
|
Delay: 1 * time.Second,
|
|
|
|
Attempts: 3,
|
|
|
|
Mode: mode,
|
2015-12-18 20:17:13 +00:00
|
|
|
}
|
|
|
|
}
|
2015-11-06 01:13:25 +00:00
|
|
|
|
2015-12-18 20:11:12 +00:00
|
|
|
// withinJitter is a helper that returns whether the returned delay is within
|
|
|
|
// the jitter.
|
|
|
|
func withinJitter(expected, actual time.Duration) bool {
|
|
|
|
return float64((actual.Nanoseconds()-expected.Nanoseconds())/
|
|
|
|
expected.Nanoseconds()) <= jitter
|
|
|
|
}
|
|
|
|
|
2018-10-10 03:05:22 +00:00
|
|
|
func testExitResult(exit int) *drivers.ExitResult {
|
|
|
|
return &drivers.ExitResult{
|
|
|
|
ExitCode: exit,
|
|
|
|
}
|
2016-02-29 00:56:05 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
func TestClient_RestartTracker_ModeDelay(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeDelay)
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeService, nil)
|
2015-12-18 20:17:13 +00:00
|
|
|
for i := 0; i < p.Attempts; i++ {
|
2018-10-10 03:05:22 +00:00
|
|
|
state, when := rt.SetExitResult(testExitResult(127)).GetState()
|
2016-02-29 00:56:05 +00:00
|
|
|
if state != structs.TaskRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
|
2015-11-06 01:13:25 +00:00
|
|
|
}
|
2015-12-18 20:11:12 +00:00
|
|
|
if !withinJitter(p.Delay, when) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
|
2015-11-06 01:13:25 +00:00
|
|
|
}
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
2015-11-06 01:13:25 +00:00
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// Follow up restarts should cause delay.
|
2015-11-06 01:13:25 +00:00
|
|
|
for i := 0; i < 3; i++ {
|
2018-10-10 03:05:22 +00:00
|
|
|
state, when := rt.SetExitResult(testExitResult(127)).GetState()
|
2016-02-29 00:56:05 +00:00
|
|
|
if state != structs.TaskRestarting {
|
2015-11-06 01:13:25 +00:00
|
|
|
t.Fail()
|
|
|
|
}
|
2015-12-21 15:43:45 +00:00
|
|
|
if !(when > p.Delay && when <= p.Interval) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
|
2015-11-06 01:13:25 +00:00
|
|
|
}
|
|
|
|
}
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
func TestClient_RestartTracker_ModeFail(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
|
2015-12-18 20:17:13 +00:00
|
|
|
for i := 0; i < p.Attempts; i++ {
|
2018-10-10 03:05:22 +00:00
|
|
|
state, when := rt.SetExitResult(testExitResult(127)).GetState()
|
2016-02-29 00:56:05 +00:00
|
|
|
if state != structs.TaskRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
|
2015-11-06 01:30:41 +00:00
|
|
|
}
|
2015-12-18 20:11:12 +00:00
|
|
|
if !withinJitter(p.Delay, when) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
|
2015-11-06 01:30:41 +00:00
|
|
|
}
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
2015-12-18 20:17:13 +00:00
|
|
|
|
|
|
|
// Next restart should cause fail
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, _ := rt.SetExitResult(testExitResult(127)).GetState(); state != structs.TaskNotRestarting {
|
2016-02-29 00:56:05 +00:00
|
|
|
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
|
|
|
}
|
2015-11-23 18:56:38 +00:00
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(false, structs.RestartPolicyModeDelay)
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeBatch, nil)
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, _ := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
|
2016-02-29 00:56:05 +00:00
|
|
|
t.Fatalf("NextRestart() returned %v, expected: %v", state, structs.TaskTerminated)
|
2015-11-23 18:56:38 +00:00
|
|
|
}
|
2016-02-02 22:17:39 +00:00
|
|
|
}
|
2015-11-23 18:56:38 +00:00
|
|
|
|
2016-02-02 22:17:39 +00:00
|
|
|
func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
|
|
|
p.Attempts = 0
|
2018-03-23 18:16:58 +00:00
|
|
|
|
|
|
|
// Test with a non-zero exit code
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeService, nil)
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
|
2018-03-23 18:16:58 +00:00
|
|
|
t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Even with a zero (successful) exit code non-batch jobs should exit
|
|
|
|
// with TaskNotRestarting
|
2019-12-11 20:21:39 +00:00
|
|
|
rt = NewRestartTracker(p, structs.JobTypeService, nil)
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskNotRestarting {
|
2018-03-23 18:16:58 +00:00
|
|
|
t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Batch jobs with a zero exit code and 0 attempts *do* exit cleanly
|
|
|
|
// with Terminated
|
2019-12-11 20:21:39 +00:00
|
|
|
rt = NewRestartTracker(p, structs.JobTypeBatch, nil)
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
|
2018-03-23 18:16:58 +00:00
|
|
|
t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Batch jobs with a non-zero exit code and 0 attempts exit with
|
|
|
|
// TaskNotRestarting
|
2019-12-11 20:21:39 +00:00
|
|
|
rt = NewRestartTracker(p, structs.JobTypeBatch, nil)
|
2018-10-10 03:05:22 +00:00
|
|
|
if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
|
2018-03-23 18:16:58 +00:00
|
|
|
t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
|
2016-02-02 22:17:39 +00:00
|
|
|
}
|
2015-11-23 18:56:38 +00:00
|
|
|
}
|
2016-02-29 00:56:05 +00:00
|
|
|
|
2018-07-16 21:37:27 +00:00
|
|
|
func TestClient_RestartTracker_TaskKilled(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
|
|
|
p.Attempts = 0
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeService, nil)
|
2018-07-16 21:37:27 +00:00
|
|
|
if state, when := rt.SetKilled().GetState(); state != structs.TaskKilled && when != 0 {
|
|
|
|
t.Fatalf("expect no restart; got %v %v", state, when)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-05 22:11:09 +00:00
|
|
|
func TestClient_RestartTracker_RestartTriggered(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
|
|
|
p.Attempts = 0
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeService, nil)
|
2017-09-14 22:27:39 +00:00
|
|
|
if state, when := rt.SetRestartTriggered(false).GetState(); state != structs.TaskRestarting && when != 0 {
|
2016-10-05 22:11:09 +00:00
|
|
|
t.Fatalf("expect restart immediately, got %v %v", state, when)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-15 21:54:37 +00:00
|
|
|
func TestClient_RestartTracker_RestartTriggered_Failure(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
|
|
|
p.Attempts = 1
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeService, nil)
|
2017-09-15 21:54:37 +00:00
|
|
|
if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskRestarting || when == 0 {
|
|
|
|
t.Fatalf("expect restart got %v %v", state, when)
|
|
|
|
}
|
|
|
|
if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskNotRestarting || when != 0 {
|
|
|
|
t.Fatalf("expect failed got %v %v", state, when)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-12 01:54:36 +00:00
|
|
|
func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
|
2016-02-29 00:56:05 +00:00
|
|
|
t.Parallel()
|
2016-07-12 01:54:36 +00:00
|
|
|
p := testPolicy(true, structs.RestartPolicyModeFail)
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
|
2016-10-23 01:08:30 +00:00
|
|
|
recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
|
2016-02-29 00:56:05 +00:00
|
|
|
for i := 0; i < p.Attempts; i++ {
|
|
|
|
state, when := rt.SetStartError(recErr).GetState()
|
|
|
|
if state != structs.TaskRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
|
|
|
|
}
|
|
|
|
if !withinJitter(p.Delay, when) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Next restart should cause fail
|
|
|
|
if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 01:54:36 +00:00
|
|
|
|
|
|
|
func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
|
2016-07-30 12:11:06 +00:00
|
|
|
t.Parallel()
|
|
|
|
p := testPolicy(true, structs.RestartPolicyModeDelay)
|
2019-12-11 20:21:39 +00:00
|
|
|
rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
|
2016-10-23 01:08:30 +00:00
|
|
|
recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
|
2016-07-30 12:11:06 +00:00
|
|
|
for i := 0; i < p.Attempts; i++ {
|
|
|
|
state, when := rt.SetStartError(recErr).GetState()
|
|
|
|
if state != structs.TaskRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
|
|
|
|
}
|
|
|
|
if !withinJitter(p.Delay, when) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 01:54:36 +00:00
|
|
|
|
2016-07-30 12:11:06 +00:00
|
|
|
// Next restart should cause delay
|
|
|
|
state, when := rt.SetStartError(recErr).GetState()
|
|
|
|
if state != structs.TaskRestarting {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
|
|
|
|
}
|
|
|
|
if !(when > p.Delay && when <= p.Interval) {
|
|
|
|
t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
|
|
|
|
}
|
2016-07-12 01:54:36 +00:00
|
|
|
}
|
2019-12-11 20:21:39 +00:00
|
|
|
|
|
|
|
func TestClient_RestartTracker_Lifecycle(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
2020-01-09 19:43:05 +00:00
|
|
|
testCase := []struct {
|
|
|
|
name string
|
|
|
|
taskLifecycleConfig *structs.TaskLifecycleConfig
|
|
|
|
jobType string
|
|
|
|
shouldRestartOnSuccess bool
|
|
|
|
shouldRestartOnFailure bool
|
2019-12-11 20:21:39 +00:00
|
|
|
}{
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "service job no lifecycle",
|
|
|
|
taskLifecycleConfig: nil,
|
|
|
|
jobType: structs.JobTypeService,
|
|
|
|
shouldRestartOnSuccess: true,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "batch job no lifecycle",
|
|
|
|
taskLifecycleConfig: nil,
|
|
|
|
jobType: structs.JobTypeBatch,
|
|
|
|
shouldRestartOnSuccess: false,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "service job w/ lifecycle completed",
|
|
|
|
taskLifecycleConfig: &structs.TaskLifecycleConfig{
|
2019-12-11 20:21:39 +00:00
|
|
|
Hook: structs.TaskLifecycleHookPrestart,
|
|
|
|
BlockUntil: structs.TaskLifecycleBlockUntilCompleted,
|
|
|
|
},
|
2020-01-09 19:43:05 +00:00
|
|
|
jobType: structs.JobTypeService,
|
|
|
|
shouldRestartOnSuccess: false,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "service job w/ lifecycle running",
|
|
|
|
taskLifecycleConfig: &structs.TaskLifecycleConfig{
|
2019-12-11 20:21:39 +00:00
|
|
|
Hook: structs.TaskLifecycleHookPrestart,
|
|
|
|
BlockUntil: structs.TaskLifecycleBlockUntilRunning,
|
|
|
|
},
|
2020-01-09 19:43:05 +00:00
|
|
|
jobType: structs.JobTypeService,
|
|
|
|
shouldRestartOnSuccess: true,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "batch job lifecycle completed",
|
|
|
|
taskLifecycleConfig: &structs.TaskLifecycleConfig{
|
2019-12-11 20:21:39 +00:00
|
|
|
Hook: structs.TaskLifecycleHookPrestart,
|
|
|
|
BlockUntil: structs.TaskLifecycleBlockUntilCompleted,
|
|
|
|
},
|
2020-01-09 19:43:05 +00:00
|
|
|
jobType: structs.JobTypeService,
|
|
|
|
shouldRestartOnSuccess: false,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
{
|
2020-01-09 19:43:05 +00:00
|
|
|
name: "batch job lifecycle running",
|
|
|
|
taskLifecycleConfig: &structs.TaskLifecycleConfig{
|
2019-12-11 20:21:39 +00:00
|
|
|
Hook: structs.TaskLifecycleHookPrestart,
|
|
|
|
BlockUntil: structs.TaskLifecycleBlockUntilRunning,
|
|
|
|
},
|
2020-01-09 19:43:05 +00:00
|
|
|
jobType: structs.JobTypeBatch,
|
|
|
|
shouldRestartOnSuccess: true,
|
|
|
|
shouldRestartOnFailure: true,
|
2019-12-11 20:21:39 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-01-09 19:43:05 +00:00
|
|
|
for _, testCase := range testCase {
|
|
|
|
t.Run(testCase.name, func(t *testing.T) {
|
|
|
|
restartPolicy := testPolicy(true, testCase.jobType)
|
|
|
|
restartTracker := NewRestartTracker(restartPolicy, testCase.jobType, testCase.taskLifecycleConfig)
|
2019-12-11 20:21:39 +00:00
|
|
|
|
2020-01-09 19:43:05 +00:00
|
|
|
state, _ := restartTracker.SetExitResult(testExitResult(0)).GetState()
|
|
|
|
if !testCase.shouldRestartOnSuccess {
|
2019-12-11 20:21:39 +00:00
|
|
|
require.Equal(t, structs.TaskTerminated, state)
|
|
|
|
} else {
|
|
|
|
require.Equal(t, structs.TaskRestarting, state)
|
|
|
|
}
|
|
|
|
|
2020-01-09 19:43:05 +00:00
|
|
|
state, _ = restartTracker.SetExitResult(testExitResult(127)).GetState()
|
|
|
|
if !testCase.shouldRestartOnFailure {
|
|
|
|
require.Equal(t, structs.TaskTerminated, state)
|
|
|
|
} else {
|
|
|
|
require.Equal(t, structs.TaskRestarting, state)
|
|
|
|
}
|
2019-12-11 20:21:39 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|