diff --git a/client/consul/check.go b/client/consul/check.go index f068863ee..052c5c78c 100644 --- a/client/consul/check.go +++ b/client/consul/check.go @@ -80,6 +80,7 @@ type Check interface { Run() *cstructs.CheckResult ID() string Interval() time.Duration + Timeout() time.Duration } // Returns a random stagger interval between 0 and the duration diff --git a/client/consul/sync.go b/client/consul/sync.go index 41cf85478..75d9520d5 100644 --- a/client/consul/sync.go +++ b/client/consul/sync.go @@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool { // runCheck runs a check and updates the corresponding ttl check in consul func (c *ConsulService) runCheck(check Check) { res := check.Run() + if res.Duration >= check.Timeout() { + c.logger.Printf("[DEBUG] check took time: %v, timeout: %v", res.Duration, check.Timeout()) + } state := consul.HealthCritical output := res.Output switch res.ExitCode { diff --git a/client/driver/executor/checks.go b/client/driver/executor/checks.go index 0d7e6eb51..70cdd7c26 100644 --- a/client/driver/executor/checks.go +++ b/client/driver/executor/checks.go @@ -20,11 +20,16 @@ var ( client *docker.Client ) +const ( + defaultCheckTimeout = 30 * time.Second +) + // DockerScriptCheck runs nagios compatible scripts in a docker container and // provides the check result type DockerScriptCheck struct { id string interval time.Duration + timeout time.Duration containerID string logger *log.Logger cmd string @@ -117,10 +122,16 @@ func (d *DockerScriptCheck) Interval() time.Duration { return d.interval } +// Timeout returns the duration after which a check is timed out. +func (d *DockerScriptCheck) Timeout() time.Duration { + return d.timeout +} + // ExecScriptCheck runs a nagios compatible script and returns the check result type ExecScriptCheck struct { id string interval time.Duration + timeout time.Duration cmd string args []string taskDir string @@ -143,9 +154,14 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult { go func() { errCh <- cmd.Wait() }() + timeout := defaultCheckTimeout + if e.timeout != 0 { + timeout = e.timeout + } for { select { case err := <-errCh: + endTime := time.Now() if err == nil { return &cstructs.CheckResult{ ExitCode: 0, @@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult { ExitCode: exitCode, Output: string(buf.Bytes()), Timestamp: ts, + Duration: endTime.Sub(ts), } - case <-time.After(30 * time.Second): + case <-time.After(timeout): errCh <- fmt.Errorf("timed out after waiting 30s") } } @@ -180,3 +197,8 @@ func (e *ExecScriptCheck) ID() string { func (e *ExecScriptCheck) Interval() time.Duration { return e.interval } + +// Timeout returns the duration after which a check is timed out. +func (e *ExecScriptCheck) Timeout() time.Duration { + return e.timeout +} diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index 3f0cb3169..ce36c5e4e 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str return &DockerScriptCheck{ id: checkID, interval: check.Interval, + timeout: check.Timeout, containerID: e.consulCtx.ContainerID, logger: e.logger, cmd: check.Command, @@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str return &ExecScriptCheck{ id: checkID, interval: check.Interval, + timeout: check.Timeout, cmd: check.Command, args: check.Args, taskDir: e.taskDir, diff --git a/client/driver/structs/structs.go b/client/driver/structs/structs.go index ecc738e76..59bf3b195 100644 --- a/client/driver/structs/structs.go +++ b/client/driver/structs/structs.go @@ -71,5 +71,6 @@ type CheckResult struct { ExitCode int Output string Timestamp time.Time + Duration time.Duration Err error }