2015-11-05 19:12:31 +00:00
|
|
|
package client
|
|
|
|
|
|
|
|
import (
|
2015-12-17 18:37:53 +00:00
|
|
|
"math/rand"
|
2015-11-05 19:12:31 +00:00
|
|
|
"time"
|
2015-11-14 06:07:13 +00:00
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2015-11-05 19:12:31 +00:00
|
|
|
)
|
|
|
|
|
2015-12-18 20:11:12 +00:00
|
|
|
// jitter is the percent of jitter added to restart delays.
|
|
|
|
const jitter = 0.25
|
|
|
|
|
2016-02-02 23:08:07 +00:00
|
|
|
func newRestartTracker(policy *structs.RestartPolicy, jobType string) *RestartTracker {
|
|
|
|
onSuccess := true
|
|
|
|
if jobType == structs.JobTypeBatch {
|
|
|
|
onSuccess = false
|
|
|
|
}
|
2015-12-18 20:17:13 +00:00
|
|
|
return &RestartTracker{
|
|
|
|
startTime: time.Now(),
|
2016-02-02 23:08:07 +00:00
|
|
|
onSuccess: onSuccess,
|
2015-12-18 20:17:13 +00:00
|
|
|
policy: policy,
|
2015-12-17 18:37:53 +00:00
|
|
|
rand: rand.New(rand.NewSource(time.Now().Unix())),
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
type RestartTracker struct {
|
|
|
|
count int // Current number of attempts.
|
2016-02-02 23:08:07 +00:00
|
|
|
onSuccess bool // Whether to restart on successful exit code.
|
2015-12-18 20:17:13 +00:00
|
|
|
startTime time.Time // When the interval began
|
|
|
|
policy *structs.RestartPolicy
|
2015-12-17 18:37:53 +00:00
|
|
|
rand *rand.Rand
|
2015-11-14 06:07:13 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
func (r *RestartTracker) NextRestart(exitCode int) (bool, time.Duration) {
|
2016-02-02 22:17:39 +00:00
|
|
|
// Hot path if no attempts are expected
|
|
|
|
if r.policy.Attempts == 0 {
|
|
|
|
return false, 0
|
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// Check if we have entered a new interval.
|
|
|
|
end := r.startTime.Add(r.policy.Interval)
|
|
|
|
now := time.Now()
|
|
|
|
if now.After(end) {
|
|
|
|
r.count = 0
|
|
|
|
r.startTime = now
|
2016-01-20 20:00:20 +00:00
|
|
|
return r.shouldRestart(exitCode), r.jitter()
|
2015-12-18 20:17:13 +00:00
|
|
|
}
|
2015-11-05 19:12:31 +00:00
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
r.count++
|
2015-11-06 01:13:25 +00:00
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// If we are under the attempts, restart with delay.
|
|
|
|
if r.count <= r.policy.Attempts {
|
2015-12-17 18:37:53 +00:00
|
|
|
return r.shouldRestart(exitCode), r.jitter()
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// Don't restart since mode is "fail"
|
|
|
|
if r.policy.Mode == structs.RestartPolicyModeFail {
|
|
|
|
return false, 0
|
|
|
|
}
|
2015-11-05 19:12:31 +00:00
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// Apply an artifical wait to enter the next interval
|
|
|
|
return r.shouldRestart(exitCode), end.Sub(now)
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// shouldRestart returns whether a restart should occur based on the exit code
|
2016-02-02 23:08:07 +00:00
|
|
|
// and job type.
|
2015-12-18 20:17:13 +00:00
|
|
|
func (r *RestartTracker) shouldRestart(exitCode int) bool {
|
2016-02-02 23:08:07 +00:00
|
|
|
return exitCode != 0 || r.onSuccess
|
2015-12-17 18:37:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// jitter returns the delay time plus a jitter.
|
|
|
|
func (r *RestartTracker) jitter() time.Duration {
|
2016-01-20 20:00:20 +00:00
|
|
|
// Get the delay and ensure it is valid.
|
2015-12-17 18:37:53 +00:00
|
|
|
d := r.policy.Delay.Nanoseconds()
|
2016-01-20 20:00:20 +00:00
|
|
|
if d == 0 {
|
|
|
|
d = 1
|
|
|
|
}
|
|
|
|
|
2015-12-18 20:11:12 +00:00
|
|
|
j := float64(r.rand.Int63n(d)) * jitter
|
|
|
|
return time.Duration(d + int64(j))
|
2015-11-06 01:13:25 +00:00
|
|
|
}
|
|
|
|
|
2015-12-18 20:17:13 +00:00
|
|
|
// Returns a tracker that never restarts.
|
|
|
|
func noRestartsTracker() *RestartTracker {
|
|
|
|
policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
|
2016-02-02 23:08:07 +00:00
|
|
|
return newRestartTracker(policy, structs.JobTypeBatch)
|
2015-11-05 19:12:31 +00:00
|
|
|
}
|