Merge pull request #1151 from hashicorp/check-debug-logs
Added logs to indicate when checks timeout
This commit is contained in:
commit
bc748de58a
|
@ -80,6 +80,7 @@ type Check interface {
|
|||
Run() *cstructs.CheckResult
|
||||
ID() string
|
||||
Interval() time.Duration
|
||||
Timeout() time.Duration
|
||||
}
|
||||
|
||||
// Returns a random stagger interval between 0 and the duration
|
||||
|
|
|
@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool {
|
|||
// runCheck runs a check and updates the corresponding ttl check in consul
|
||||
func (c *ConsulService) runCheck(check Check) {
|
||||
res := check.Run()
|
||||
if res.Duration >= check.Timeout() {
|
||||
c.logger.Printf("[DEBUG] consul.sync: check took time: %v, timeout: %v", res.Duration, check.Timeout())
|
||||
}
|
||||
state := consul.HealthCritical
|
||||
output := res.Output
|
||||
switch res.ExitCode {
|
||||
|
@ -445,7 +448,7 @@ func (c *ConsulService) runCheck(check Check) {
|
|||
}
|
||||
if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil {
|
||||
if c.availble {
|
||||
c.logger.Printf("[DEBUG] error updating ttl check for check %q: %v", check.ID(), err)
|
||||
c.logger.Printf("[DEBUG] consul.sync: error updating ttl check for check %q: %v", check.ID(), err)
|
||||
c.availble = false
|
||||
} else {
|
||||
c.availble = true
|
||||
|
|
|
@ -20,20 +20,26 @@ var (
|
|||
client *docker.Client
|
||||
)
|
||||
|
||||
const (
|
||||
// The default check timeout
|
||||
defaultCheckTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
// DockerScriptCheck runs nagios compatible scripts in a docker container and
|
||||
// provides the check result
|
||||
type DockerScriptCheck struct {
|
||||
id string
|
||||
interval time.Duration
|
||||
containerID string
|
||||
id string // id of the check
|
||||
interval time.Duration // interval of the check
|
||||
timeout time.Duration // timeout of the check
|
||||
containerID string // container id in which the check will be invoked
|
||||
logger *log.Logger
|
||||
cmd string
|
||||
args []string
|
||||
cmd string // check command
|
||||
args []string // check command arguments
|
||||
|
||||
dockerEndpoint string
|
||||
tlsCert string
|
||||
tlsCa string
|
||||
tlsKey string
|
||||
dockerEndpoint string // docker endpoint
|
||||
tlsCert string // path to tls certificate
|
||||
tlsCa string // path to tls ca
|
||||
tlsKey string // path to tls key
|
||||
}
|
||||
|
||||
// dockerClient creates the client to interact with the docker daemon
|
||||
|
@ -117,15 +123,24 @@ func (d *DockerScriptCheck) Interval() time.Duration {
|
|||
return d.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (d *DockerScriptCheck) Timeout() time.Duration {
|
||||
if d.timeout == 0 {
|
||||
return defaultCheckTimeout
|
||||
}
|
||||
return d.timeout
|
||||
}
|
||||
|
||||
// ExecScriptCheck runs a nagios compatible script and returns the check result
|
||||
type ExecScriptCheck struct {
|
||||
id string
|
||||
interval time.Duration
|
||||
cmd string
|
||||
args []string
|
||||
taskDir string
|
||||
id string // id of the script check
|
||||
interval time.Duration // interval at which the check is invoked
|
||||
timeout time.Duration // timeout duration of the check
|
||||
cmd string // command of the check
|
||||
args []string // args passed to the check
|
||||
taskDir string // the root directory of the check
|
||||
|
||||
FSIsolation bool
|
||||
FSIsolation bool // indicates whether the check has to be run within a chroot
|
||||
}
|
||||
|
||||
// Run runs an exec script check
|
||||
|
@ -146,6 +161,7 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
|
|||
for {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
endTime := time.Now()
|
||||
if err == nil {
|
||||
return &cstructs.CheckResult{
|
||||
ExitCode: 0,
|
||||
|
@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
|
|||
ExitCode: exitCode,
|
||||
Output: string(buf.Bytes()),
|
||||
Timestamp: ts,
|
||||
Duration: endTime.Sub(ts),
|
||||
}
|
||||
case <-time.After(30 * time.Second):
|
||||
case <-time.After(e.Timeout()):
|
||||
errCh <- fmt.Errorf("timed out after waiting 30s")
|
||||
}
|
||||
}
|
||||
|
@ -180,3 +197,11 @@ func (e *ExecScriptCheck) ID() string {
|
|||
func (e *ExecScriptCheck) Interval() time.Duration {
|
||||
return e.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (e *ExecScriptCheck) Timeout() time.Duration {
|
||||
if e.timeout == 0 {
|
||||
return defaultCheckTimeout
|
||||
}
|
||||
return e.timeout
|
||||
}
|
||||
|
|
|
@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
|
|||
return &DockerScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
containerID: e.consulCtx.ContainerID,
|
||||
logger: e.logger,
|
||||
cmd: check.Command,
|
||||
|
@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
|
|||
return &ExecScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
cmd: check.Command,
|
||||
args: check.Args,
|
||||
taskDir: e.taskDir,
|
||||
|
|
|
@ -68,8 +68,19 @@ func (r *RecoverableError) Error() string {
|
|||
|
||||
// CheckResult encapsulates the result of a check
|
||||
type CheckResult struct {
|
||||
ExitCode int
|
||||
Output string
|
||||
|
||||
// ExitCode is the exit code of the check
|
||||
ExitCode int
|
||||
|
||||
// Output is the output of the check script
|
||||
Output string
|
||||
|
||||
// Timestamp is the time at which the check was executed
|
||||
Timestamp time.Time
|
||||
Err error
|
||||
|
||||
// Duration is the time it took the check to run
|
||||
Duration time.Duration
|
||||
|
||||
// Err is the error that a check returned
|
||||
Err error
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue