Merge pull request #3621 from hashicorp/issue-3576-docker-check-flaps
Docker check flaps with "connection reset by peer"
This commit is contained in:
commit
993799496b
|
@ -1776,6 +1776,9 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
|
||||||
Logger: a.logger,
|
Logger: a.logger,
|
||||||
Client: a.dockerClient,
|
Client: a.dockerClient,
|
||||||
}
|
}
|
||||||
|
if prev := a.checkDockers[check.CheckID]; prev != nil {
|
||||||
|
prev.Stop()
|
||||||
|
}
|
||||||
dockerCheck.Start()
|
dockerCheck.Start()
|
||||||
a.checkDockers[check.CheckID] = dockerCheck
|
a.checkDockers[check.CheckID] = dockerCheck
|
||||||
|
|
||||||
|
|
|
@ -573,6 +573,7 @@ func (c *CheckDocker) Stop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *CheckDocker) run() {
|
func (c *CheckDocker) run() {
|
||||||
|
defer c.Client.Close()
|
||||||
firstWait := lib.RandomStagger(c.Interval)
|
firstWait := lib.RandomStagger(c.Interval)
|
||||||
next := time.After(firstWait)
|
next := time.After(firstWait)
|
||||||
for {
|
for {
|
||||||
|
|
|
@ -54,6 +54,13 @@ func NewDockerClient(host string, maxbuf int64) (*DockerClient, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *DockerClient) Close() error {
|
||||||
|
if t, ok := c.client.Transport.(*http.Transport); ok {
|
||||||
|
t.CloseIdleConnections()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *DockerClient) Host() string {
|
func (c *DockerClient) Host() string {
|
||||||
return c.host
|
return c.host
|
||||||
}
|
}
|
||||||
|
@ -151,11 +158,17 @@ func (c *DockerClient) CreateExec(containerID string, cmd []string) (string, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, error) {
|
func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, error) {
|
||||||
data := struct{ Detach, Tty bool }{Detach: false, Tty: true}
|
data := struct{ Detach, Tty bool }{Detach: false, Tty: false}
|
||||||
uri := fmt.Sprintf("/exec/%s/start", execID)
|
uri := fmt.Sprintf("/exec/%s/start", execID)
|
||||||
b, code, err := c.call("POST", uri, data)
|
b, code, err := c.call("POST", uri, data)
|
||||||
switch {
|
switch {
|
||||||
case err != nil:
|
// todo(fs): https://github.com/hashicorp/consul/pull/3621
|
||||||
|
// todo(fs): for some reason the docker agent closes the connection during the
|
||||||
|
// todo(fs): io.Copy call in c.call which causes a "connection reset by peer" error
|
||||||
|
// todo(fs): even though both body and status code have been received. My current is
|
||||||
|
// todo(fs): that the docker agent closes this prematurely but I don't understand why.
|
||||||
|
// todo(fs): the code below ignores this error.
|
||||||
|
case err != nil && !strings.Contains(err.Error(), "connection reset by peer"):
|
||||||
return nil, fmt.Errorf("start exec failed for container %s: %s", containerID, err)
|
return nil, fmt.Errorf("start exec failed for container %s: %s", containerID, err)
|
||||||
case code == 200:
|
case code == 200:
|
||||||
return b, nil
|
return b, nil
|
||||||
|
@ -164,7 +177,7 @@ func (c *DockerClient) StartExec(containerID, execID string) (*circbuf.Buffer, e
|
||||||
case code == 409:
|
case code == 409:
|
||||||
return nil, fmt.Errorf("start exec failed since container %s is paused or stopped", containerID)
|
return nil, fmt.Errorf("start exec failed since container %s is paused or stopped", containerID)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("start exec failed for container %s with status %d: %s", containerID, code, b)
|
return nil, fmt.Errorf("start exec failed for container %s with status %d: body: %s err: %s", containerID, code, b, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue