Merge pull request #3683 from hashicorp/check-logging-redux
Cleans up check logging.
This commit is contained in:
commit
d7ec5aca7c
|
@ -103,16 +103,13 @@ func (c *CheckMonitor) check() {
|
|||
// Create the command
|
||||
var cmd *osexec.Cmd
|
||||
var err error
|
||||
var cmdDisplay string
|
||||
if len(c.ScriptArgs) > 0 {
|
||||
cmdDisplay = fmt.Sprintf("%v", c.ScriptArgs)
|
||||
cmd, err = exec.Subprocess(c.ScriptArgs)
|
||||
} else {
|
||||
cmdDisplay = c.Script
|
||||
cmd, err = exec.Script(c.Script)
|
||||
}
|
||||
if err != nil {
|
||||
c.Logger.Printf("[ERR] agent: failed to setup invoke '%s': %s", cmdDisplay, err)
|
||||
c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
|
@ -129,14 +126,13 @@ func (c *CheckMonitor) check() {
|
|||
outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
|
||||
output.Size(), output.TotalWritten(), outputStr)
|
||||
}
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s",
|
||||
c.CheckID, cmdDisplay, outputStr)
|
||||
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr)
|
||||
return outputStr
|
||||
}
|
||||
|
||||
// Start the check
|
||||
if err := cmd.Start(); err != nil {
|
||||
c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", cmdDisplay, err)
|
||||
c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
|
@ -154,11 +150,11 @@ func (c *CheckMonitor) check() {
|
|||
select {
|
||||
case <-time.After(timeout):
|
||||
if err := exec.KillCommandSubtree(cmd); err != nil {
|
||||
c.Logger.Printf("[WARN] Failed to kill check '%s' after timeout: %v", cmdDisplay, err)
|
||||
c.Logger.Printf("[WARN] Check %q failed to kill after timeout: %s", c.CheckID, err)
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
|
||||
c.Logger.Printf("[WARN] %s '%s'", msg, cmdDisplay)
|
||||
c.Logger.Printf("[WARN] Check %q: %s", c.CheckID, msg)
|
||||
|
||||
outputStr := truncateAndLogOutput()
|
||||
if len(outputStr) > 0 {
|
||||
|
@ -178,7 +174,7 @@ func (c *CheckMonitor) check() {
|
|||
// Check if the check passed
|
||||
outputStr := truncateAndLogOutput()
|
||||
if err == nil {
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
||||
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
|
||||
return
|
||||
}
|
||||
|
@ -189,7 +185,7 @@ func (c *CheckMonitor) check() {
|
|||
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
||||
code := status.ExitStatus()
|
||||
if code == 1 {
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
|
||||
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
|
||||
return
|
||||
}
|
||||
|
@ -197,7 +193,7 @@ func (c *CheckMonitor) check() {
|
|||
}
|
||||
|
||||
// Set the health as critical
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
||||
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
|
||||
}
|
||||
|
||||
|
@ -247,7 +243,7 @@ func (c *CheckTTL) run() {
|
|||
for {
|
||||
select {
|
||||
case <-c.timer.C:
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' missed TTL, is now critical",
|
||||
c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical",
|
||||
c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())
|
||||
|
||||
|
@ -273,8 +269,7 @@ func (c *CheckTTL) getExpiredOutput() string {
|
|||
// SetStatus is used to update the status of the check,
|
||||
// and to renew the TTL. If expired, TTL is restarted.
|
||||
func (c *CheckTTL) SetStatus(status, output string) {
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%v' status is now %v",
|
||||
c.CheckID, status)
|
||||
c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
|
||||
c.Notify.UpdateCheck(c.CheckID, status, output)
|
||||
|
||||
// Store the last output so we can retain it if the TTL expires.
|
||||
|
@ -358,7 +353,6 @@ func (c *CheckHTTP) Stop() {
|
|||
func (c *CheckHTTP) run() {
|
||||
// Get the randomized initial pause time
|
||||
initialPauseTime := lib.RandomStagger(c.Interval)
|
||||
c.Logger.Printf("[DEBUG] agent: pausing %v before first HTTP request of %s", initialPauseTime, c.HTTP)
|
||||
next := time.After(initialPauseTime)
|
||||
for {
|
||||
select {
|
||||
|
@ -380,7 +374,7 @@ func (c *CheckHTTP) check() {
|
|||
|
||||
req, err := http.NewRequest(method, c.HTTP, nil)
|
||||
if err != nil {
|
||||
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
|
||||
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
|
@ -405,7 +399,7 @@ func (c *CheckHTTP) check() {
|
|||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
c.Logger.Printf("[WARN] agent: http request failed '%s': %s", c.HTTP, err)
|
||||
c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
|
@ -414,7 +408,7 @@ func (c *CheckHTTP) check() {
|
|||
// Read the response into a circular buffer to limit the size
|
||||
output, _ := circbuf.NewBuffer(BufSize)
|
||||
if _, err := io.Copy(output, resp.Body); err != nil {
|
||||
c.Logger.Printf("[WARN] agent: Check '%v': Get error while reading body: %s", c.CheckID, err)
|
||||
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
|
||||
}
|
||||
|
||||
// Format the response body
|
||||
|
@ -422,19 +416,19 @@ func (c *CheckHTTP) check() {
|
|||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
|
||||
// PASSING (2xx)
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
||||
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)
|
||||
|
||||
} else if resp.StatusCode == 429 {
|
||||
// WARNING
|
||||
// 429 Too Many Requests (RFC 6585)
|
||||
// The user has sent too many requests in a given amount of time.
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' is now warning", c.CheckID)
|
||||
c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)
|
||||
|
||||
} else {
|
||||
// CRITICAL
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
||||
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
|
||||
}
|
||||
}
|
||||
|
@ -496,7 +490,6 @@ func (c *CheckTCP) Stop() {
|
|||
func (c *CheckTCP) run() {
|
||||
// Get the randomized initial pause time
|
||||
initialPauseTime := lib.RandomStagger(c.Interval)
|
||||
c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP)
|
||||
next := time.After(initialPauseTime)
|
||||
for {
|
||||
select {
|
||||
|
@ -513,12 +506,12 @@ func (c *CheckTCP) run() {
|
|||
func (c *CheckTCP) check() {
|
||||
conn, err := c.dialer.Dial(`tcp`, c.TCP)
|
||||
if err != nil {
|
||||
c.Logger.Printf("[WARN] agent: socket connection failed '%s': %s", c.TCP, err)
|
||||
c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
|
||||
return
|
||||
}
|
||||
conn.Close()
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%v' is passing", c.CheckID)
|
||||
c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
|
||||
}
|
||||
|
||||
|
@ -585,7 +578,7 @@ func (c *CheckDocker) check() {
|
|||
var out string
|
||||
status, b, err := c.doCheck()
|
||||
if err != nil {
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%s': %s", c.CheckID, err)
|
||||
c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err)
|
||||
out = err.Error()
|
||||
} else {
|
||||
// out is already limited to CheckBufSize since we're getting a
|
||||
|
@ -595,11 +588,11 @@ func (c *CheckDocker) check() {
|
|||
if int(b.TotalWritten()) > len(out) {
|
||||
out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
|
||||
}
|
||||
c.Logger.Printf("[DEBUG] agent: Check '%s' script '%s' output: %s", c.CheckID, c.Script, out)
|
||||
c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out)
|
||||
}
|
||||
|
||||
if status == api.HealthCritical {
|
||||
c.Logger.Printf("[WARN] agent: Check '%v' is now critical", c.CheckID)
|
||||
c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
|
||||
}
|
||||
|
||||
c.Notify.UpdateCheck(c.CheckID, status, out)
|
||||
|
@ -632,10 +625,10 @@ func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) {
|
|||
case 0:
|
||||
return api.HealthPassing, buf, nil
|
||||
case 1:
|
||||
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
|
||||
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
|
||||
return api.HealthWarning, buf, nil
|
||||
default:
|
||||
c.Logger.Printf("[DEBUG] Check failed with exit code: %d", exitCode)
|
||||
c.Logger.Printf("[DEBUG] Check %q failed with exit code: %d", c.CheckID, exitCode)
|
||||
return api.HealthCritical, buf, nil
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue