fix autopilot_failure_tolerance, add autopilot metrics test case (#11399)
Signed-off-by: FFMMM <FFMMM@users.noreply.github.com>
This commit is contained in:
parent
96afd767a1
commit
6433a57d3c
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
telemetry: fixes a bug with Prometheus consul_autopilot_failure_tolerance metric where 0 is reported instead of NaN on follower servers.
|
||||||
|
```
|
|
@ -59,6 +59,9 @@ func (d *AutopilotDelegate) NotifyState(state *autopilot.State) {
|
||||||
// https://www.consul.io/docs/agent/telemetry#autopilot
|
// https://www.consul.io/docs/agent/telemetry#autopilot
|
||||||
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
||||||
|
|
||||||
|
// also emit NaN for failure tolerance to be backwards compatible
|
||||||
|
metrics.SetGauge([]string{"autopilot", "failure_tolerance"}, float32(math.NaN()))
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,6 +86,7 @@ func (s *Server) initAutopilot(config *Config) {
|
||||||
)
|
)
|
||||||
|
|
||||||
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
||||||
|
metrics.SetGauge([]string{"autopilot", "failure_tolerance"}, float32(math.NaN()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server {
|
func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server {
|
||||||
|
|
|
@ -1,53 +1,97 @@
|
||||||
package agent
|
package agent
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestHTTPHandlers_AgentMetrics_ConsulAutopilotHealthy_Prometheus adds testing around
|
|
||||||
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
|
||||||
func TestHTTPHandlers_AgentMetrics_ConsulAutopilotHealthy_Prometheus(t *testing.T) {
|
|
||||||
checkForShortTesting(t)
|
|
||||||
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
|
||||||
|
|
||||||
// don't bootstrap agent so as not to
|
|
||||||
// become a leader
|
|
||||||
hcl := `
|
|
||||||
telemetry = {
|
|
||||||
prometheus_retention_time = "5s",
|
|
||||||
disable_hostname = true
|
|
||||||
}
|
|
||||||
bootstrap = false
|
|
||||||
`
|
|
||||||
|
|
||||||
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
|
||||||
defer a.Shutdown()
|
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to generate new http request. err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
respRec := httptest.NewRecorder()
|
|
||||||
_, err = a.srv.AgentMetrics(respRec, req)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to serve agent metrics. err: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Run("Check consul_autopilot_healthy metric value on startup", func(t *testing.T) {
|
|
||||||
target := "consul_autopilot_healthy NaN"
|
|
||||||
keyValue := strings.Split(target, " ")
|
|
||||||
if !strings.Contains(respRec.Body.String(), target) {
|
|
||||||
t.Fatalf("Could not find the metric \"%s\" with value \"%s\" in the /v1/agent/metrics response", keyValue[0], keyValue[1])
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkForShortTesting(t *testing.T) {
|
func checkForShortTesting(t *testing.T) {
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
t.Skip("too slow for testing.Short")
|
t.Skip("too slow for testing.Short")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) {
|
||||||
|
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
||||||
|
require.NoError(t, err, "Failed to generate new http request.")
|
||||||
|
|
||||||
|
_, err = a.srv.AgentMetrics(respRec, req)
|
||||||
|
require.NoError(t, err, "Failed to serve agent metrics")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertMetricExistsWithValue(t *testing.T, respRec *httptest.ResponseRecorder, metric string, value string) {
|
||||||
|
if respRec.Body.String() == "" {
|
||||||
|
t.Fatalf("Response body is empty.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// eg "consul_autopilot_healthy NaN"
|
||||||
|
target := metric + " " + value
|
||||||
|
|
||||||
|
if !strings.Contains(respRec.Body.String(), target) {
|
||||||
|
t.Fatalf("Could not find the metric \"%s\" with value \"%s\" in the /v1/agent/metrics response", metric, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, metric string) {
|
||||||
|
if respRec.Body.String() == "" {
|
||||||
|
t.Fatalf("Response body is empty.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(respRec.Body.String(), metric) {
|
||||||
|
t.Fatalf("Didn't expect to find the metric \"%s\" in the /v1/agent/metrics response", metric)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
|
||||||
|
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
||||||
|
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
||||||
|
checkForShortTesting(t)
|
||||||
|
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||||
|
|
||||||
|
t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) {
|
||||||
|
hcl := `
|
||||||
|
telemetry = {
|
||||||
|
prometheus_retention_time = "5s"
|
||||||
|
disable_hostname = true
|
||||||
|
metrics_prefix = "agent_1"
|
||||||
|
}
|
||||||
|
bootstrap = false
|
||||||
|
server = false
|
||||||
|
`
|
||||||
|
|
||||||
|
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
||||||
|
defer a.Shutdown()
|
||||||
|
|
||||||
|
respRec := httptest.NewRecorder()
|
||||||
|
recordPromMetrics(t, a, respRec)
|
||||||
|
|
||||||
|
assertMetricNotExists(t, respRec, "agent_1_autopilot_healthy")
|
||||||
|
assertMetricNotExists(t, respRec, "agent_1_autopilot_failure_tolerance")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Check consul_autopilot_healthy metric value on startup", func(t *testing.T) {
|
||||||
|
// don't bootstrap agent so as not to
|
||||||
|
// become a leader
|
||||||
|
hcl := `
|
||||||
|
telemetry = {
|
||||||
|
prometheus_retention_time = "5s",
|
||||||
|
disable_hostname = true
|
||||||
|
metrics_prefix = "agent_2"
|
||||||
|
}
|
||||||
|
bootstrap = false
|
||||||
|
`
|
||||||
|
|
||||||
|
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
||||||
|
defer a.Shutdown()
|
||||||
|
|
||||||
|
respRec := httptest.NewRecorder()
|
||||||
|
recordPromMetrics(t, a, respRec)
|
||||||
|
|
||||||
|
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_healthy", "NaN")
|
||||||
|
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue