Merge pull request #1762 from mshean/script-timeout

Add Timeout field to CheckMonitor
This commit is contained in:
James Phillips 2016-04-24 23:08:06 -07:00
commit 03b0c196e0
4 changed files with 42 additions and 2 deletions

View File

@ -974,6 +974,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType, persist
CheckID: check.CheckID,
Script: chkType.Script,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
ReapLock: &a.reapLock,
}

View File

@ -101,6 +101,7 @@ type CheckMonitor struct {
CheckID string
Script string
Interval time.Duration
Timeout time.Duration
Logger *log.Logger
ReapLock *sync.RWMutex
@ -180,7 +181,11 @@ func (c *CheckMonitor) check() {
errCh <- cmd.Wait()
}()
go func() {
time.Sleep(30 * time.Second)
if c.Timeout > 0 {
time.Sleep(c.Timeout)
} else {
time.Sleep(30 * time.Second)
}
errCh <- fmt.Errorf("Timed out running check '%s'", c.Script)
}()
err = <-errCh

View File

@ -82,6 +82,36 @@ func TestCheckMonitor_BadCmd(t *testing.T) {
expectStatus(t, "foobarbaz", structs.HealthCritical)
}
func TestCheckMonitor_Timeout(t *testing.T) {
mock := &MockNotify{
state: make(map[string]string),
updates: make(map[string]int),
output: make(map[string]string),
}
check := &CheckMonitor{
Notify: mock,
CheckID: "foo",
Script: "sleep 1 && exit 0",
Interval: 10 * time.Millisecond,
Timeout: 5 * time.Millisecond,
Logger: log.New(os.Stderr, "", log.LstdFlags),
ReapLock: &sync.RWMutex{},
}
check.Start()
defer check.Stop()
time.Sleep(50 * time.Millisecond)
// Should have at least 2 updates
if mock.updates["foo"] < 2 {
t.Fatalf("should have at least 2 updates %v", mock.updates)
}
if mock.state["foo"] != "critical" {
t.Fatalf("should be critical %v", mock.state)
}
}
func TestCheckMonitor_RandomStagger(t *testing.T) {
mock := &MockNotify{
state: make(map[string]string),

View File

@ -22,6 +22,9 @@ There are five different kinds of checks:
generates some output. A script is paired with an invocation interval (e.g.
every 30 seconds). This is similar to the Nagios plugin system. The output of
a script check is limited to 4K. Output larger than this will be truncated.
By default, Script checks will be configured with a timeout equal to 30 seconds.
It is possible to configure a custom Script check timeout value by specifying the
`timeout` field in the check definition.
* HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g.
every 30 seconds) to the specified URL. The status of the service depends on the HTTP response code:
@ -83,7 +86,8 @@ A script check:
"id": "mem-util",
"name": "Memory utilization",
"script": "/usr/local/bin/check_mem.py",
"interval": "10s"
"interval": "10s",
"timeout": "1s"
}
}
```