Merge pull request #1151 from hashicorp/check-debug-logs

Added logs to indicate when checks timeout
This commit is contained in:
Diptanu Choudhury 2016-05-05 16:52:04 -07:00
commit bc748de58a
5 changed files with 62 additions and 20 deletions

View File

@ -80,6 +80,7 @@ type Check interface {
Run() *cstructs.CheckResult
ID() string
Interval() time.Duration
Timeout() time.Duration
}
// Returns a random stagger interval between 0 and the duration

View File

@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool {
// runCheck runs a check and updates the corresponding ttl check in consul
func (c *ConsulService) runCheck(check Check) {
res := check.Run()
if res.Duration >= check.Timeout() {
c.logger.Printf("[DEBUG] consul.sync: check took time: %v, timeout: %v", res.Duration, check.Timeout())
}
state := consul.HealthCritical
output := res.Output
switch res.ExitCode {
@ -445,7 +448,7 @@ func (c *ConsulService) runCheck(check Check) {
}
if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil {
if c.availble {
c.logger.Printf("[DEBUG] error updating ttl check for check %q: %v", check.ID(), err)
c.logger.Printf("[DEBUG] consul.sync: error updating ttl check for check %q: %v", check.ID(), err)
c.availble = false
} else {
c.availble = true

View File

@ -20,20 +20,26 @@ var (
client *docker.Client
)
const (
// The default check timeout
defaultCheckTimeout = 30 * time.Second
)
// DockerScriptCheck runs nagios compatible scripts in a docker container and
// provides the check result
type DockerScriptCheck struct {
id string
interval time.Duration
containerID string
id string // id of the check
interval time.Duration // interval of the check
timeout time.Duration // timeout of the check
containerID string // container id in which the check will be invoked
logger *log.Logger
cmd string
args []string
cmd string // check command
args []string // check command arguments
dockerEndpoint string
tlsCert string
tlsCa string
tlsKey string
dockerEndpoint string // docker endpoint
tlsCert string // path to tls certificate
tlsCa string // path to tls ca
tlsKey string // path to tls key
}
// dockerClient creates the client to interact with the docker daemon
@ -117,15 +123,24 @@ func (d *DockerScriptCheck) Interval() time.Duration {
return d.interval
}
// Timeout returns the duration after which a check is timed out.
func (d *DockerScriptCheck) Timeout() time.Duration {
if d.timeout == 0 {
return defaultCheckTimeout
}
return d.timeout
}
// ExecScriptCheck runs a nagios compatible script and returns the check result
type ExecScriptCheck struct {
id string
interval time.Duration
cmd string
args []string
taskDir string
id string // id of the script check
interval time.Duration // interval at which the check is invoked
timeout time.Duration // timeout duration of the check
cmd string // command of the check
args []string // args passed to the check
taskDir string // the root directory of the check
FSIsolation bool
FSIsolation bool // indicates whether the check has to be run within a chroot
}
// Run runs an exec script check
@ -146,6 +161,7 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
for {
select {
case err := <-errCh:
endTime := time.Now()
if err == nil {
return &cstructs.CheckResult{
ExitCode: 0,
@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
ExitCode: exitCode,
Output: string(buf.Bytes()),
Timestamp: ts,
Duration: endTime.Sub(ts),
}
case <-time.After(30 * time.Second):
case <-time.After(e.Timeout()):
errCh <- fmt.Errorf("timed out after waiting 30s")
}
}
@ -180,3 +197,11 @@ func (e *ExecScriptCheck) ID() string {
func (e *ExecScriptCheck) Interval() time.Duration {
return e.interval
}
// Timeout returns the duration after which a check is timed out.
func (e *ExecScriptCheck) Timeout() time.Duration {
if e.timeout == 0 {
return defaultCheckTimeout
}
return e.timeout
}

View File

@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
return &DockerScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
containerID: e.consulCtx.ContainerID,
logger: e.logger,
cmd: check.Command,
@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
return &ExecScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
cmd: check.Command,
args: check.Args,
taskDir: e.taskDir,

View File

@ -68,8 +68,19 @@ func (r *RecoverableError) Error() string {
// CheckResult encapsulates the result of a check
type CheckResult struct {
ExitCode int
Output string
// ExitCode is the exit code of the check
ExitCode int
// Output is the output of the check script
Output string
// Timestamp is the time at which the check was executed
Timestamp time.Time
Err error
// Duration is the time it took the check to run
Duration time.Duration
// Err is the error that a check returned
Err error
}