Merge pull request #1785 from hashicorp/f-check-put-api
Adds a PUT-based API for TTL checks and retains output on timeouts.
This commit is contained in:
commit
61afe388fa
|
@ -167,6 +167,58 @@ func (s *HTTPServer) AgentCheckFail(resp http.ResponseWriter, req *http.Request)
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// checkUpdate is the payload for a PUT to AgentCheckUpdate.
|
||||
type checkUpdate struct {
|
||||
// Status us one of the structs.Health* states, "passing", "warning", or
|
||||
// "critical".
|
||||
Status string
|
||||
|
||||
// Output is the information to post to the UI for operators as the
|
||||
// output of the process that decided to hit the TTL check. This is
|
||||
// different from the note field that's associated with the check
|
||||
// itself.
|
||||
Output string
|
||||
}
|
||||
|
||||
// AgentCheckUpdate is a PUT-based alternative to the GET-based Pass/Warn/Fail
|
||||
// APIs.
|
||||
func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
if req.Method != "PUT" {
|
||||
resp.WriteHeader(405)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var update checkUpdate
|
||||
if err := decodeBody(req, &update, nil); err != nil {
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte(fmt.Sprintf("Request decode failed: %v", err)))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch update.Status {
|
||||
case structs.HealthPassing:
|
||||
case structs.HealthWarning:
|
||||
case structs.HealthCritical:
|
||||
default:
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte(fmt.Sprintf("Invalid check status: '%s'", update.Status)))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
total := len(update.Output)
|
||||
if total > CheckBufSize {
|
||||
update.Output = fmt.Sprintf("%s ... (captured %d of %d bytes)",
|
||||
update.Output[:CheckBufSize], CheckBufSize, total)
|
||||
}
|
||||
|
||||
checkID := strings.TrimPrefix(req.URL.Path, "/v1/agent/check/update/")
|
||||
if err := s.agent.UpdateCheck(checkID, update.Status, update.Output); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.syncChanges()
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *HTTPServer) AgentRegisterService(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
var args ServiceDefinition
|
||||
// Fixup the type decode of TTL or Interval if a check if provided
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"net/http/httptest"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -428,7 +429,6 @@ func TestHTTPAgentPassCheck(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Register node
|
||||
req, err := http.NewRequest("GET", "/v1/agent/check/pass/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
|
@ -461,7 +461,6 @@ func TestHTTPAgentWarnCheck(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Register node
|
||||
req, err := http.NewRequest("GET", "/v1/agent/check/warn/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
|
@ -494,7 +493,6 @@ func TestHTTPAgentFailCheck(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Register node
|
||||
req, err := http.NewRequest("GET", "/v1/agent/check/fail/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
|
@ -515,6 +513,134 @@ func TestHTTPAgentFailCheck(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgentUpdateCheck(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
chk := &structs.HealthCheck{Name: "test", CheckID: "test"}
|
||||
chkType := &CheckType{TTL: 15 * time.Second}
|
||||
if err := srv.agent.AddCheck(chk, chkType, false, ""); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
cases := []checkUpdate{
|
||||
checkUpdate{"passing", "hello-passing"},
|
||||
checkUpdate{"critical", "hello-critical"},
|
||||
checkUpdate{"warning", "hello-warning"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
req, err := http.NewRequest("PUT", "/v1/agent/check/update/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
req.Body = encodeReq(c)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.AgentCheckUpdate(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if obj != nil {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
state := srv.agent.state.Checks()["test"]
|
||||
if state.Status != c.Status || state.Output != c.Output {
|
||||
t.Fatalf("bad: %v", state)
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure abusive levels of output are capped.
|
||||
{
|
||||
req, err := http.NewRequest("PUT", "/v1/agent/check/update/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
update := checkUpdate{
|
||||
Status: "passing",
|
||||
Output: strings.Repeat("-= bad -=", 5*CheckBufSize),
|
||||
}
|
||||
req.Body = encodeReq(update)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.AgentCheckUpdate(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if obj != nil {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Since we append some notes about truncating, we just do a
|
||||
// rough check that the output buffer was cut down so this test
|
||||
// isn't super brittle.
|
||||
state := srv.agent.state.Checks()["test"]
|
||||
if state.Status != structs.HealthPassing || len(state.Output) > 2*CheckBufSize {
|
||||
t.Fatalf("bad: %v", state)
|
||||
}
|
||||
}
|
||||
|
||||
// Check a bogus status.
|
||||
{
|
||||
req, err := http.NewRequest("PUT", "/v1/agent/check/update/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
update := checkUpdate{
|
||||
Status: "itscomplicated",
|
||||
}
|
||||
req.Body = encodeReq(update)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.AgentCheckUpdate(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if obj != nil {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if resp.Code != 400 {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// Check a bogus verb.
|
||||
{
|
||||
req, err := http.NewRequest("POST", "/v1/agent/check/update/test", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
update := checkUpdate{
|
||||
Status: "passing",
|
||||
}
|
||||
req.Body = encodeReq(update)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.AgentCheckUpdate(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if obj != nil {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if resp.Code != 405 {
|
||||
t.Fatalf("expected 405, got %d", resp.Code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgentRegisterService(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
|
|
@ -232,6 +232,9 @@ type CheckTTL struct {
|
|||
|
||||
timer *time.Timer
|
||||
|
||||
lastOutput string
|
||||
lastOutputLock sync.RWMutex
|
||||
|
||||
stop bool
|
||||
stopCh chan struct{}
|
||||
stopLock sync.Mutex
|
||||
|
@ -265,7 +268,7 @@ func (c *CheckTTL) run() {
|
|||
case <-c.timer.C:
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical",
|
||||
c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, "TTL expired")
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical, c.getExpiredOutput())
|
||||
|
||||
case <-c.stopCh:
|
||||
return
|
||||
|
@ -273,12 +276,31 @@ func (c *CheckTTL) run() {
|
|||
}
|
||||
}
|
||||
|
||||
// getExpiredOutput formats the output for the case when the TTL is expired.
|
||||
func (c *CheckTTL) getExpiredOutput() string {
|
||||
c.lastOutputLock.RLock()
|
||||
defer c.lastOutputLock.RUnlock()
|
||||
|
||||
const prefix = "TTL expired"
|
||||
if c.lastOutput == "" {
|
||||
return prefix
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s (last output before timeout follows): %s", prefix, c.lastOutput)
|
||||
}
|
||||
|
||||
// SetStatus is used to update the status of the check,
|
||||
// and to renew the TTL. If expired, TTL is restarted.
|
||||
func (c *CheckTTL) SetStatus(status, output string) {
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v",
|
||||
c.CheckID, status)
|
||||
c.Notify.UpdateCheck(c.CheckID, status, output)
|
||||
|
||||
// Store the last output so we can retain it if the TTL expires.
|
||||
c.lastOutputLock.Lock()
|
||||
c.lastOutput = output
|
||||
c.lastOutputLock.Unlock()
|
||||
|
||||
c.timer.Reset(c.TTL)
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"net/http/httptest"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -150,7 +151,7 @@ func TestCheckTTL(t *testing.T) {
|
|||
defer check.Stop()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
check.SetStatus(structs.HealthPassing, "")
|
||||
check.SetStatus(structs.HealthPassing, "test-output")
|
||||
|
||||
if mock.updates["foo"] != 1 {
|
||||
t.Fatalf("should have 1 updates %v", mock.updates)
|
||||
|
@ -176,6 +177,10 @@ func TestCheckTTL(t *testing.T) {
|
|||
if mock.state["foo"] != structs.HealthCritical {
|
||||
t.Fatalf("should be critical %v", mock.state)
|
||||
}
|
||||
|
||||
if !strings.Contains(mock.output["foo"], "test-output") {
|
||||
t.Fatalf("should have retained output %v", mock.output)
|
||||
}
|
||||
}
|
||||
|
||||
func mockHTTPServer(responseCode int) *httptest.Server {
|
||||
|
|
|
@ -232,6 +232,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
|
|||
s.mux.HandleFunc("/v1/agent/check/pass/", s.wrap(s.AgentCheckPass))
|
||||
s.mux.HandleFunc("/v1/agent/check/warn/", s.wrap(s.AgentCheckWarn))
|
||||
s.mux.HandleFunc("/v1/agent/check/fail/", s.wrap(s.AgentCheckFail))
|
||||
s.mux.HandleFunc("/v1/agent/check/update/", s.wrap(s.AgentCheckUpdate))
|
||||
|
||||
s.mux.HandleFunc("/v1/agent/service/register", s.wrap(s.AgentRegisterService))
|
||||
s.mux.HandleFunc("/v1/agent/service/deregister/", s.wrap(s.AgentDeregisterService))
|
||||
|
|
|
@ -25,9 +25,10 @@ The following endpoints are supported:
|
|||
* [`/v1/agent/force-leave/<node>`](#agent_force_leave)>: Forces removal of a node
|
||||
* [`/v1/agent/check/register`](#agent_check_register) : Registers a new local check
|
||||
* [`/v1/agent/check/deregister/<checkID>`](#agent_check_deregister) : Deregisters a local check
|
||||
* [`/v1/agent/check/pass/<checkID>`](#agent_check_pass) : Marks a local test as passing
|
||||
* [`/v1/agent/check/warn/<checkID>`](#agent_check_warn) : Marks a local test as warning
|
||||
* [`/v1/agent/check/fail/<checkID>`](#agent_check_fail) : Marks a local test as critical
|
||||
* [`/v1/agent/check/pass/<checkID>`](#agent_check_pass) : Marks a local check as passing
|
||||
* [`/v1/agent/check/warn/<checkID>`](#agent_check_warn) : Marks a local check as warning
|
||||
* [`/v1/agent/check/fail/<checkID>`](#agent_check_fail) : Marks a local check as critical
|
||||
* [`/v1/agent/check/update/<checkID>`](#agent_check_update) : Updates a local check
|
||||
* [`/v1/agent/service/register`](#agent_service_register) : Registers a new local service
|
||||
* [`/v1/agent/service/deregister/<serviceID>`](#agent_service_deregister) : Deregisters a local service
|
||||
* [`/v1/agent/service/maintenance/<serviceID>`](#agent_service_maintenance) : Manages service maintenance mode
|
||||
|
@ -310,8 +311,9 @@ This endpoint is used with a check that is of the [TTL type](/docs/agent/checks.
|
|||
When this endpoint is accessed via a GET, the status of the check is set to `passing`
|
||||
and the TTL clock is reset.
|
||||
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check.
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check. This will be passed through to the check's `Output` field
|
||||
in the check endpoints.
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
|
@ -321,8 +323,9 @@ This endpoint is used with a check that is of the [TTL type](/docs/agent/checks.
|
|||
When this endpoint is accessed via a GET, the status of the check is set to `warning`,
|
||||
and the TTL clock is reset.
|
||||
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check.
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check. This will be passed through to the check's `Output` field
|
||||
in the check endpoints.
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
|
@ -332,8 +335,33 @@ This endpoint is used with a check that is of the [TTL type](/docs/agent/checks.
|
|||
When this endpoint is accessed via a GET, the status of the check is set to `critical`,
|
||||
and the TTL clock is reset.
|
||||
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check.
|
||||
The optional "?note=" query parameter can be used to associate a human-readable message
|
||||
with the status of the check. This will be passed through to the check's `Output` field
|
||||
in the check endpoints.
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
### <a name="agent_check_update"></a> /v1/agent/check/update/\<checkId\>
|
||||
|
||||
This endpoint is used with a check that is of the [TTL type](/docs/agent/checks.html).
|
||||
When this endpoint is accessed with a PUT, the status and output of the check are
|
||||
updated and the TTL clock is reset.
|
||||
|
||||
This endpoint expects a JSON request body to be put. The request body must look like:
|
||||
|
||||
```javascript
|
||||
{
|
||||
"Status": "passing",
|
||||
"Output": "curl reported a failure:\n\n..."
|
||||
}
|
||||
```
|
||||
|
||||
The `Status` field is mandatory, and must be set to "passing", "warning", or "critical".
|
||||
|
||||
`Output` is an optional field that will associate a human-readable message with the status
|
||||
of the check, such as the output of the checking script or process. This will be truncated
|
||||
if it exceeds 4KB in size. This will be passed through to the check's `Output` field in the
|
||||
check endpoints.
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
|
|
Loading…
Reference in a new issue