Merge pull request #1405 from novilabs/delay-on-startup-failure
do not fail for multiple startup failures, delay instead
This commit is contained in:
commit
9085b5ca0d
|
@ -132,9 +132,15 @@ func (r *RestartTracker) handleStartError() (string, time.Duration) {
|
|||
}
|
||||
|
||||
if r.count > r.policy.Attempts {
|
||||
r.reason = fmt.Sprintf("Exceeded allowed attempts %d in interval %v",
|
||||
r.policy.Attempts, r.policy.Interval)
|
||||
return structs.TaskNotRestarting, 0
|
||||
if r.policy.Mode == structs.RestartPolicyModeFail {
|
||||
r.reason = fmt.Sprintf(
|
||||
`Exceeded allowed atttempts %d in interval %v and mode is "fail"`,
|
||||
r.policy.Attempts, r.policy.Interval)
|
||||
return structs.TaskNotRestarting, 0
|
||||
} else {
|
||||
r.reason = ReasonDelay
|
||||
return structs.TaskRestarting, r.getDelay()
|
||||
}
|
||||
}
|
||||
|
||||
r.reason = ReasonWithinPolicy
|
||||
|
|
|
@ -94,9 +94,9 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
|
||||
func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
|
||||
t.Parallel()
|
||||
p := testPolicy(true, structs.RestartPolicyModeDelay)
|
||||
p := testPolicy(true, structs.RestartPolicyModeFail)
|
||||
rt := newRestartTracker(p, structs.JobTypeSystem)
|
||||
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
|
||||
for i := 0; i < p.Attempts; i++ {
|
||||
|
@ -114,3 +114,28 @@ func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
|
|||
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
|
||||
t.Parallel()
|
||||
p := testPolicy(true, structs.RestartPolicyModeDelay)
|
||||
rt := newRestartTracker(p, structs.JobTypeSystem)
|
||||
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
|
||||
for i := 0; i < p.Attempts; i++ {
|
||||
state, when := rt.SetStartError(recErr).GetState()
|
||||
if state != structs.TaskRestarting {
|
||||
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
|
||||
}
|
||||
if !withinJitter(p.Delay, when) {
|
||||
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
|
||||
}
|
||||
}
|
||||
|
||||
// Next restart should cause delay
|
||||
state, when := rt.SetStartError(recErr).GetState()
|
||||
if state != structs.TaskRestarting {
|
||||
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
|
||||
}
|
||||
if !(when > p.Delay && when <= p.Interval) {
|
||||
t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue