Merge pull request #1405 from novilabs/delay-on-startup-failure

do not fail for multiple startup failures, delay instead
This commit is contained in:
Alex Dadgar 2016-07-12 09:51:40 -06:00 committed by GitHub
commit 9085b5ca0d
2 changed files with 36 additions and 5 deletions

View File

@ -132,9 +132,15 @@ func (r *RestartTracker) handleStartError() (string, time.Duration) {
}
if r.count > r.policy.Attempts {
r.reason = fmt.Sprintf("Exceeded allowed attempts %d in interval %v",
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
if r.policy.Mode == structs.RestartPolicyModeFail {
r.reason = fmt.Sprintf(
`Exceeded allowed atttempts %d in interval %v and mode is "fail"`,
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
} else {
r.reason = ReasonDelay
return structs.TaskRestarting, r.getDelay()
}
}
r.reason = ReasonWithinPolicy

View File

@ -94,9 +94,9 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
}
}
func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
t.Parallel()
p := testPolicy(true, structs.RestartPolicyModeDelay)
p := testPolicy(true, structs.RestartPolicyModeFail)
rt := newRestartTracker(p, structs.JobTypeSystem)
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
for i := 0; i < p.Attempts; i++ {
@ -114,3 +114,28 @@ func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
}
}
func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
t.Parallel()
p := testPolicy(true, structs.RestartPolicyModeDelay)
rt := newRestartTracker(p, structs.JobTypeSystem)
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
for i := 0; i < p.Attempts; i++ {
state, when := rt.SetStartError(recErr).GetState()
if state != structs.TaskRestarting {
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
}
if !withinJitter(p.Delay, when) {
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
}
}
// Next restart should cause delay
state, when := rt.SetStartError(recErr).GetState()
if state != structs.TaskRestarting {
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
}
if !(when > p.Delay && when <= p.Interval) {
t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
}
}