Added logs to indicate when checks timeout

This commit is contained in:
Diptanu Choudhury 2016-05-05 10:01:38 -07:00
parent 61aedccce7
commit 9a8ad773b3
5 changed files with 30 additions and 1 deletions

View File

@ -80,6 +80,7 @@ type Check interface {
Run() *cstructs.CheckResult
ID() string
Interval() time.Duration
Timeout() time.Duration
}
// Returns a random stagger interval between 0 and the duration

View File

@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool {
// runCheck runs a check and updates the corresponding ttl check in consul
func (c *ConsulService) runCheck(check Check) {
res := check.Run()
if res.Duration >= check.Timeout() {
c.logger.Printf("[DEBUG] check took time: %v, timeout: %v", res.Duration, check.Timeout())
}
state := consul.HealthCritical
output := res.Output
switch res.ExitCode {

View File

@ -20,11 +20,16 @@ var (
client *docker.Client
)
const (
defaultCheckTimeout = 30 * time.Second
)
// DockerScriptCheck runs nagios compatible scripts in a docker container and
// provides the check result
type DockerScriptCheck struct {
id string
interval time.Duration
timeout time.Duration
containerID string
logger *log.Logger
cmd string
@ -117,10 +122,16 @@ func (d *DockerScriptCheck) Interval() time.Duration {
return d.interval
}
// Timeout returns the duration after which a check is timed out.
func (d *DockerScriptCheck) Timeout() time.Duration {
return d.timeout
}
// ExecScriptCheck runs a nagios compatible script and returns the check result
type ExecScriptCheck struct {
id string
interval time.Duration
timeout time.Duration
cmd string
args []string
taskDir string
@ -143,9 +154,14 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
go func() {
errCh <- cmd.Wait()
}()
timeout := defaultCheckTimeout
if e.timeout != 0 {
timeout = e.timeout
}
for {
select {
case err := <-errCh:
endTime := time.Now()
if err == nil {
return &cstructs.CheckResult{
ExitCode: 0,
@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
ExitCode: exitCode,
Output: string(buf.Bytes()),
Timestamp: ts,
Duration: endTime.Sub(ts),
}
case <-time.After(30 * time.Second):
case <-time.After(timeout):
errCh <- fmt.Errorf("timed out after waiting 30s")
}
}
@ -180,3 +197,8 @@ func (e *ExecScriptCheck) ID() string {
func (e *ExecScriptCheck) Interval() time.Duration {
return e.interval
}
// Timeout returns the duration after which a check is timed out.
func (e *ExecScriptCheck) Timeout() time.Duration {
return e.timeout
}

View File

@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
return &DockerScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
containerID: e.consulCtx.ContainerID,
logger: e.logger,
cmd: check.Command,
@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
return &ExecScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
cmd: check.Command,
args: check.Args,
taskDir: e.taskDir,

View File

@ -71,5 +71,6 @@ type CheckResult struct {
ExitCode int
Output string
Timestamp time.Time
Duration time.Duration
Err error
}