open-nomad/command/agent/metrics_endpoint_test.go

173 lines
5.0 KiB
Go

package agent
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestHTTP_MetricsWithIllegalMethod(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
httpTest(t, nil, func(s *TestAgent) {
req, err := http.NewRequest("DELETE", "/v1/metrics", nil)
assert.Nil(err)
respW := httptest.NewRecorder()
_, err = s.Server.MetricsRequest(respW, req)
assert.NotNil(err, "HTTP DELETE should not be accepted for this endpoint")
})
}
func TestHTTP_MetricsPrometheusDisabled(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
httpTest(t, func(c *Config) { c.Telemetry.PrometheusMetrics = false }, func(s *TestAgent) {
req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil)
assert.Nil(err)
resp, err := s.Server.MetricsRequest(nil, req)
assert.Nil(resp)
assert.Error(err, "Prometheus is not enabled")
})
}
func TestHTTP_MetricsPrometheusEnabled(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
httpTest(t, nil, func(s *TestAgent) {
req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil)
assert.Nil(err)
respW := httptest.NewRecorder()
resp, err := s.Server.MetricsRequest(respW, req)
assert.Nil(resp)
assert.Nil(err)
// Ensure the response body is not empty and that it contains something
// that looks like a metric we expect.
assert.NotNil(respW.Body)
assert.Contains(respW.Body.String(), "HELP go_gc_duration_seconds")
})
}
func TestHTTP_Metrics(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
httpTest(t, nil, func(s *TestAgent) {
// make a separate HTTP request first, to ensure Nomad has written metrics
// and prevent a race condition
req, err := http.NewRequest("GET", "/v1/agent/self", nil)
assert.Nil(err)
respW := httptest.NewRecorder()
s.Server.AgentSelfRequest(respW, req)
// now make a metrics endpoint request, which should be already initialized
// and written to
req, err = http.NewRequest("GET", "/v1/metrics", nil)
assert.Nil(err)
respW = httptest.NewRecorder()
testutil.WaitForResult(func() (bool, error) {
resp, err := s.Server.MetricsRequest(respW, req)
if err != nil {
return false, err
}
respW.Flush()
res := resp.(metrics.MetricsSummary)
return len(res.Gauges) != 0, nil
}, func(err error) {
t.Fatalf("should have metrics: %v", err)
})
})
}
// When emitting metrics, the client should use the local copy of the allocs with
// updated task states (not the copy submitted by the server).
//
// **Cannot** be run in parallel as metrics are global.
func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
numTasks := 10
httpTest(t, func(c *Config) {
c.Telemetry.PublishAllocationMetrics = true
c.Telemetry.PublishNodeMetrics = true
}, func(s *TestAgent) {
// Create the job, wait for it to finish
job := mock.BatchJob()
job.TaskGroups[0].Count = numTasks
testutil.RegisterJob(t, s.RPC, job)
testutil.WaitForResult(func() (bool, error) {
time.Sleep(200 * time.Millisecond)
args := &structs.JobSpecificRequest{}
args.JobID = job.ID
args.QueryOptions.Region = "global"
var resp structs.SingleJobResponse
err := s.RPC("Job.GetJob", args, &resp)
return err == nil && resp.Job.Status == "dead", err
}, func(err error) {
require.Fail("timed-out waiting for job to complete")
})
nodeID := s.client.NodeID()
// wait for metrics to converge
var pending, running, terminal float32 = -1.0, -1.0, -1.0
testutil.WaitForResultRetries(100, func() (bool, error) {
time.Sleep(100 * time.Millisecond)
req, err := http.NewRequest("GET", "/v1/metrics", nil)
require.NoError(err)
respW := httptest.NewRecorder()
obj, err := s.Server.MetricsRequest(respW, req)
if err != nil {
return false, err
}
metrics := obj.(metrics.MetricsSummary)
for _, g := range metrics.Gauges {
// ignore client metrics belonging to other test nodes
// from other tests that contaminate go-metrics reporting
if g.DisplayLabels["node_id"] != nodeID {
continue
}
if strings.HasSuffix(g.Name, "client.allocations.pending") {
pending = g.Value
}
if strings.HasSuffix(g.Name, "client.allocations.running") {
running = g.Value
}
if strings.HasSuffix(g.Name, "client.allocations.terminal") {
terminal = g.Value
}
}
// client alloc metrics should reflect that there is numTasks terminal allocs and no other allocs
return pending == float32(0) && running == float32(0) &&
terminal == float32(numTasks), nil
}, func(err error) {
require.Fail("timed out waiting for metrics to converge",
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
})
})
}