90 lines
2.1 KiB
Go
90 lines
2.1 KiB
Go
package client
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
// The errorCounter keeps track of the number of times a process has exited
|
|
// It returns the duration after which a task is restarted
|
|
// For Batch jobs, the interval is set to zero value since the takss
|
|
// will be restarted only upto maxAttempts times
|
|
type restartTracker interface {
|
|
nextRestart(exitCode int) (bool, time.Duration)
|
|
}
|
|
|
|
func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker {
|
|
switch jobType {
|
|
case structs.JobTypeService:
|
|
return &serviceRestartTracker{
|
|
maxAttempts: restartPolicy.Attempts,
|
|
startTime: time.Now(),
|
|
interval: restartPolicy.Interval,
|
|
delay: restartPolicy.Delay,
|
|
}
|
|
default:
|
|
return &batchRestartTracker{
|
|
maxAttempts: restartPolicy.Attempts,
|
|
delay: restartPolicy.Delay,
|
|
}
|
|
}
|
|
}
|
|
|
|
// noRestartsTracker returns a RestartTracker that never restarts.
|
|
func noRestartsTracker() restartTracker {
|
|
return &batchRestartTracker{maxAttempts: 0}
|
|
}
|
|
|
|
type batchRestartTracker struct {
|
|
maxAttempts int
|
|
delay time.Duration
|
|
|
|
count int
|
|
}
|
|
|
|
func (b *batchRestartTracker) increment() {
|
|
b.count += 1
|
|
}
|
|
|
|
func (b *batchRestartTracker) nextRestart(exitCode int) (bool, time.Duration) {
|
|
if b.count < b.maxAttempts && exitCode > 0 {
|
|
b.increment()
|
|
return true, b.delay
|
|
}
|
|
return false, 0
|
|
}
|
|
|
|
type serviceRestartTracker struct {
|
|
maxAttempts int
|
|
delay time.Duration
|
|
interval time.Duration
|
|
|
|
count int
|
|
startTime time.Time
|
|
}
|
|
|
|
func (s *serviceRestartTracker) increment() {
|
|
s.count += 1
|
|
}
|
|
|
|
func (s *serviceRestartTracker) nextRestart(exitCode int) (bool, time.Duration) {
|
|
defer s.increment()
|
|
windowEndTime := s.startTime.Add(s.interval)
|
|
now := time.Now()
|
|
// If the window of restart is over we wait until the delay duration
|
|
if now.After(windowEndTime) {
|
|
s.count = 0
|
|
s.startTime = time.Now()
|
|
return true, s.delay
|
|
}
|
|
|
|
// If we are within the delay duration and didn't exhaust all retries
|
|
if s.count < s.maxAttempts {
|
|
return true, s.delay
|
|
}
|
|
|
|
// If we exhausted all the retries and are withing the time window
|
|
return true, windowEndTime.Sub(now)
|
|
}
|