Merge pull request #3621 from hashicorp/issue-3576-docker-check-flaps

Docker check flaps with "connection reset by peer"
This commit is contained in:
Frank Schröder 2017-10-26 19:53:57 +02:00 committed by GitHub
commit 993799496b
3 changed files with 20 additions and 3 deletions

View file

@ -1776,6 +1776,9 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
Logger: a.logger, Logger: a.logger,
Client: a.dockerClient, Client: a.dockerClient,
} }
if prev := a.checkDockers[check.CheckID]; prev != nil {
prev.Stop()
}
dockerCheck.Start() dockerCheck.Start()
a.checkDockers[check.CheckID] = dockerCheck a.checkDockers[check.CheckID] = dockerCheck

View file

@ -573,6 +573,7 @@ func (c *CheckDocker) Stop() {
} }
func (c *CheckDocker) run() { func (c *CheckDocker) run() {
defer c.Client.Close()
firstWait := lib.RandomStagger(c.Interval) firstWait := lib.RandomStagger(c.Interval)
next := time.After(firstWait) next := time.After(firstWait)
for { for {

View file

@ -54,6 +54,13 @@ func NewDockerClient(host string, maxbuf int64) (*DockerClient, error) {
}, nil }, nil
} }
func (c *DockerClient) Close() error {
if t, ok := c.client.Transport.(*http.Transport); ok {
t.CloseIdleConnections()
}
return nil
}
func (c *DockerClient) Host() string { func (c *DockerClient) Host() string {
return c.host return c.host
} }
@ -151,11 +158,17 @@ func (c *DockerClient) CreateExec(containerID string, cmd []string) (string, err
} }
func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, error) { func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, error) {
data := struct{ Detach, Tty bool }{Detach: false, Tty: true} data := struct{ Detach, Tty bool }{Detach: false, Tty: false}
uri := fmt.Sprintf("/exec/%s/start", execID) uri := fmt.Sprintf("/exec/%s/start", execID)
b, code, err := c.call("POST", uri, data) b, code, err := c.call("POST", uri, data)
switch { switch {
case err != nil: // todo(fs): https://github.com/hashicorp/consul/pull/3621
// todo(fs): for some reason the docker agent closes the connection during the
// todo(fs): io.Copy call in c.call which causes a "connection reset by peer" error
// todo(fs): even though both body and status code have been received. My current is
// todo(fs): that the docker agent closes this prematurely but I don't understand why.
// todo(fs): the code below ignores this error.
case err != nil && !strings.Contains(err.Error(), "connection reset by peer"):
return nil, fmt.Errorf("start exec failed for container %s: %s", containerID, err) return nil, fmt.Errorf("start exec failed for container %s: %s", containerID, err)
case code == 200: case code == 200:
return b, nil return b, nil
@ -164,7 +177,7 @@ func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, e
case code == 409: case code == 409:
return nil, fmt.Errorf("start exec failed since container %s is paused or stopped", containerID) return nil, fmt.Errorf("start exec failed since container %s is paused or stopped", containerID)
default: default:
return nil, fmt.Errorf("start exec failed for container %s with status %d: %s", containerID, code, b) return nil, fmt.Errorf("start exec failed for container %s with status %d: body: %s err: %s", containerID, code, b, err)
} }
} }