retry transient docker errors within function
This commit is contained in:
parent
e8d73e3d72
commit
ac329a5e07
|
@ -41,6 +41,12 @@ var (
|
|||
// running operations such as waiting on containers and collect stats
|
||||
waitClient *docker.Client
|
||||
|
||||
dockerTransientErrs = []string{
|
||||
"Client.Timeout exceeded while awaiting headers",
|
||||
"EOF",
|
||||
"API error (500)",
|
||||
}
|
||||
|
||||
// recoverableErrTimeouts returns a recoverable error if the error was due
|
||||
// to timeouts
|
||||
recoverableErrTimeouts = func(err error) error {
|
||||
|
@ -456,6 +462,10 @@ CREATE:
|
|||
// There is still a very small chance this is possible even with the
|
||||
// coordinator so retry.
|
||||
return nil, nstructs.NewRecoverableError(createErr, true)
|
||||
} else if isDockerTransientError(createErr) && attempted < 5 {
|
||||
attempted++
|
||||
time.Sleep(1 * time.Second)
|
||||
goto CREATE
|
||||
}
|
||||
|
||||
return nil, recoverableErrTimeouts(createErr)
|
||||
|
@ -474,8 +484,7 @@ START:
|
|||
|
||||
d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr)
|
||||
|
||||
// If it is a 500 error it is likely we can retry and be successful
|
||||
if strings.Contains(startErr.Error(), "API error (500)") {
|
||||
if isDockerTransientError(startErr) {
|
||||
if attempted < 5 {
|
||||
attempted++
|
||||
time.Sleep(1 * time.Second)
|
||||
|
@ -1458,3 +1467,18 @@ func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) {
|
|||
func (d *Driver) Shutdown() {
|
||||
d.signalShutdown()
|
||||
}
|
||||
|
||||
func isDockerTransientError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
errMsg := err.Error()
|
||||
for _, te := range dockerTransientErrs {
|
||||
if strings.Contains(errMsg, te) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue