From fcaee471a0e57dac4937ba0c614192f672e261f2 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 23 Mar 2018 11:16:58 -0700 Subject: [PATCH] client: always mark exited sys/svc allocs as failed When restarts.attempts=0 was set in a jobspec a system or service alloc that exited with 0 status would be marked as `completed` instead of `failed`. Since system and service jobs are intended to run until stopped or updated, they should always be marked as failed when they exit even in cases where the exit code is 0. --- client/restarts.go | 7 ++++++- client/restarts_test.go | 25 ++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/client/restarts.go b/client/restarts.go index 5fd3216ce..ebdc62cff 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -127,10 +127,15 @@ func (r *RestartTracker) GetState() (string, time.Duration) { // Hot path if no attempts are expected if r.policy.Attempts == 0 { r.reason = ReasonNoRestartsAllowed - if r.waitRes != nil && r.waitRes.Successful() { + + // If the task does not restart on a successful exit code and + // the exit code was successful: terminate. + if !r.onSuccess && r.waitRes != nil && r.waitRes.Successful() { return structs.TaskTerminated, 0 } + // Task restarts even on a successful exit code but no restarts + // allowed. return structs.TaskNotRestarting, 0 } diff --git a/client/restarts_test.go b/client/restarts_test.go index b0cad5b1a..915902e04 100644 --- a/client/restarts_test.go +++ b/client/restarts_test.go @@ -88,9 +88,32 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) { t.Parallel() p := testPolicy(true, structs.RestartPolicyModeFail) p.Attempts = 0 + + // Test with a non-zero exit code rt := newRestartTracker(p, structs.JobTypeService) if state, when := rt.SetWaitResult(testWaitResult(1)).GetState(); state != structs.TaskNotRestarting { - t.Fatalf("expect no restart, got restart/delay: %v", when) + t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) + } + + // Even with a zero (successful) exit code non-batch jobs should exit + // with TaskNotRestarting + rt = newRestartTracker(p, structs.JobTypeService) + if state, when := rt.SetWaitResult(testWaitResult(0)).GetState(); state != structs.TaskNotRestarting { + t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) + } + + // Batch jobs with a zero exit code and 0 attempts *do* exit cleanly + // with Terminated + rt = newRestartTracker(p, structs.JobTypeBatch) + if state, when := rt.SetWaitResult(testWaitResult(0)).GetState(); state != structs.TaskTerminated { + t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when) + } + + // Batch jobs with a non-zero exit code and 0 attempts exit with + // TaskNotRestarting + rt = newRestartTracker(p, structs.JobTypeBatch) + if state, when := rt.SetWaitResult(testWaitResult(1)).GetState(); state != structs.TaskNotRestarting { + t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) } }