Added logs to indicate when checks timeout
This commit is contained in:
parent
61aedccce7
commit
9a8ad773b3
|
@ -80,6 +80,7 @@ type Check interface {
|
|||
Run() *cstructs.CheckResult
|
||||
ID() string
|
||||
Interval() time.Duration
|
||||
Timeout() time.Duration
|
||||
}
|
||||
|
||||
// Returns a random stagger interval between 0 and the duration
|
||||
|
|
|
@ -429,6 +429,9 @@ func (c *ConsulService) consulPresent() bool {
|
|||
// runCheck runs a check and updates the corresponding ttl check in consul
|
||||
func (c *ConsulService) runCheck(check Check) {
|
||||
res := check.Run()
|
||||
if res.Duration >= check.Timeout() {
|
||||
c.logger.Printf("[DEBUG] check took time: %v, timeout: %v", res.Duration, check.Timeout())
|
||||
}
|
||||
state := consul.HealthCritical
|
||||
output := res.Output
|
||||
switch res.ExitCode {
|
||||
|
|
|
@ -20,11 +20,16 @@ var (
|
|||
client *docker.Client
|
||||
)
|
||||
|
||||
const (
|
||||
defaultCheckTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
// DockerScriptCheck runs nagios compatible scripts in a docker container and
|
||||
// provides the check result
|
||||
type DockerScriptCheck struct {
|
||||
id string
|
||||
interval time.Duration
|
||||
timeout time.Duration
|
||||
containerID string
|
||||
logger *log.Logger
|
||||
cmd string
|
||||
|
@ -117,10 +122,16 @@ func (d *DockerScriptCheck) Interval() time.Duration {
|
|||
return d.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (d *DockerScriptCheck) Timeout() time.Duration {
|
||||
return d.timeout
|
||||
}
|
||||
|
||||
// ExecScriptCheck runs a nagios compatible script and returns the check result
|
||||
type ExecScriptCheck struct {
|
||||
id string
|
||||
interval time.Duration
|
||||
timeout time.Duration
|
||||
cmd string
|
||||
args []string
|
||||
taskDir string
|
||||
|
@ -143,9 +154,14 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
|
|||
go func() {
|
||||
errCh <- cmd.Wait()
|
||||
}()
|
||||
timeout := defaultCheckTimeout
|
||||
if e.timeout != 0 {
|
||||
timeout = e.timeout
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
endTime := time.Now()
|
||||
if err == nil {
|
||||
return &cstructs.CheckResult{
|
||||
ExitCode: 0,
|
||||
|
@ -163,8 +179,9 @@ func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
|
|||
ExitCode: exitCode,
|
||||
Output: string(buf.Bytes()),
|
||||
Timestamp: ts,
|
||||
Duration: endTime.Sub(ts),
|
||||
}
|
||||
case <-time.After(30 * time.Second):
|
||||
case <-time.After(timeout):
|
||||
errCh <- fmt.Errorf("timed out after waiting 30s")
|
||||
}
|
||||
}
|
||||
|
@ -180,3 +197,8 @@ func (e *ExecScriptCheck) ID() string {
|
|||
func (e *ExecScriptCheck) Interval() time.Duration {
|
||||
return e.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (e *ExecScriptCheck) Timeout() time.Duration {
|
||||
return e.timeout
|
||||
}
|
||||
|
|
|
@ -566,6 +566,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
|
|||
return &DockerScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
containerID: e.consulCtx.ContainerID,
|
||||
logger: e.logger,
|
||||
cmd: check.Command,
|
||||
|
@ -577,6 +578,7 @@ func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID str
|
|||
return &ExecScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
cmd: check.Command,
|
||||
args: check.Args,
|
||||
taskDir: e.taskDir,
|
||||
|
|
|
@ -71,5 +71,6 @@ type CheckResult struct {
|
|||
ExitCode int
|
||||
Output string
|
||||
Timestamp time.Time
|
||||
Duration time.Duration
|
||||
Err error
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue