7b8cfee162
The test asserts that alloc counts get reported accurately in metrics by inspecting the metrics endpoint directly. Sadly, the metrics as collected by `armon/go-metrics` seem to be stateful and may contain info from other tests. This means that the test can fail depending on the order of returned metrics. Inspecting the metrics output of one failing run, you can see the duplicate guage entries but for different node_ids: ``` { "Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal", "Value": 10, "Labels": { "datacenter": "dc1", "node_class": "none", "node_id": "67402bf4-00f3-bd8d-9fa8-f4d1924a892a" } }, { "Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal", "Value": 0, "Labels": { "datacenter": "dc1", "node_class": "none", "node_id": "a2945b48-7e66-68e2-c922-49b20dd4e20c" } }, ```
137 lines
4 KiB
Go
137 lines
4 KiB
Go
package agent
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestHTTP_MetricsWithIllegalMethod(t *testing.T) {
|
|
assert := assert.New(t)
|
|
|
|
t.Parallel()
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
req, err := http.NewRequest("DELETE", "/v1/metrics", nil)
|
|
assert.Nil(err)
|
|
respW := httptest.NewRecorder()
|
|
|
|
_, err = s.Server.MetricsRequest(respW, req)
|
|
assert.NotNil(err, "HTTP DELETE should not be accepted for this endpoint")
|
|
})
|
|
}
|
|
|
|
func TestHTTP_Metrics(t *testing.T) {
|
|
assert := assert.New(t)
|
|
|
|
t.Parallel()
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
// make a separate HTTP request first, to ensure Nomad has written metrics
|
|
// and prevent a race condition
|
|
req, err := http.NewRequest("GET", "/v1/agent/self", nil)
|
|
assert.Nil(err)
|
|
respW := httptest.NewRecorder()
|
|
s.Server.AgentSelfRequest(respW, req)
|
|
|
|
// now make a metrics endpoint request, which should be already initialized
|
|
// and written to
|
|
req, err = http.NewRequest("GET", "/v1/metrics", nil)
|
|
assert.Nil(err)
|
|
respW = httptest.NewRecorder()
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
resp, err := s.Server.MetricsRequest(respW, req)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
respW.Flush()
|
|
|
|
res := resp.(metrics.MetricsSummary)
|
|
return len(res.Gauges) != 0, nil
|
|
}, func(err error) {
|
|
t.Fatalf("should have metrics: %v", err)
|
|
})
|
|
})
|
|
}
|
|
|
|
// When emitting metrics, the client should use the local copy of the allocs with
|
|
// updated task states (not the copy submitted by the server).
|
|
func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
|
|
t.Parallel()
|
|
require := require.New(t)
|
|
numTasks := 10
|
|
|
|
httpTest(t, func(c *Config) {
|
|
c.Telemetry.PublishAllocationMetrics = true
|
|
c.Telemetry.PublishNodeMetrics = true
|
|
c.Telemetry.BackwardsCompatibleMetrics = false
|
|
c.Telemetry.DisableTaggedMetrics = false
|
|
}, func(s *TestAgent) {
|
|
// Create the job, wait for it to finish
|
|
job := mock.BatchJob()
|
|
job.TaskGroups[0].Count = numTasks
|
|
testutil.RegisterJob(t, s.RPC, job)
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
time.Sleep(200 * time.Millisecond)
|
|
args := &structs.JobSpecificRequest{}
|
|
args.JobID = job.ID
|
|
args.QueryOptions.Region = "global"
|
|
var resp structs.SingleJobResponse
|
|
err := s.RPC("Job.GetJob", args, &resp)
|
|
return err == nil && resp.Job.Status == "dead", err
|
|
}, func(err error) {
|
|
require.Fail("timed-out waiting for job to complete")
|
|
})
|
|
|
|
nodeID := s.client.NodeID()
|
|
|
|
// wait for metrics to converge
|
|
var pending, running, terminal float32 = -1.0, -1.0, -1.0
|
|
testutil.WaitForResultRetries(100, func() (bool, error) {
|
|
time.Sleep(100 * time.Millisecond)
|
|
req, err := http.NewRequest("GET", "/v1/metrics", nil)
|
|
require.NoError(err)
|
|
respW := httptest.NewRecorder()
|
|
|
|
obj, err := s.Server.MetricsRequest(respW, req)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
metrics := obj.(metrics.MetricsSummary)
|
|
for _, g := range metrics.Gauges {
|
|
|
|
// ignore client metrics belonging to other test nodes
|
|
// from other tests that contaminate go-metrics reporting
|
|
if g.DisplayLabels["node_id"] != nodeID {
|
|
continue
|
|
}
|
|
|
|
if strings.HasSuffix(g.Name, "client.allocations.pending") {
|
|
pending = g.Value
|
|
}
|
|
if strings.HasSuffix(g.Name, "client.allocations.running") {
|
|
running = g.Value
|
|
}
|
|
if strings.HasSuffix(g.Name, "client.allocations.terminal") {
|
|
terminal = g.Value
|
|
}
|
|
}
|
|
// client alloc metrics should reflect that there is numTasks terminal allocs and no other allocs
|
|
return pending == float32(0) && running == float32(0) &&
|
|
terminal == float32(numTasks), nil
|
|
}, func(err error) {
|
|
require.Fail("timed out waiting for metrics to converge",
|
|
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
|
|
})
|
|
})
|
|
}
|