a40477a7b8
* client/metrics: modified metrics to use (updated) client copy of allocation instead of (unupdated) server copy * updated armon/go-metrics to address race condition in DisplayMetrics
128 lines
3.7 KiB
Go
128 lines
3.7 KiB
Go
package agent
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestHTTP_MetricsWithIllegalMethod(t *testing.T) {
|
|
assert := assert.New(t)
|
|
|
|
t.Parallel()
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
req, err := http.NewRequest("DELETE", "/v1/metrics", nil)
|
|
assert.Nil(err)
|
|
respW := httptest.NewRecorder()
|
|
|
|
_, err = s.Server.MetricsRequest(respW, req)
|
|
assert.NotNil(err, "HTTP DELETE should not be accepted for this endpoint")
|
|
})
|
|
}
|
|
|
|
func TestHTTP_Metrics(t *testing.T) {
|
|
assert := assert.New(t)
|
|
|
|
t.Parallel()
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
// make a separate HTTP request first, to ensure Nomad has written metrics
|
|
// and prevent a race condition
|
|
req, err := http.NewRequest("GET", "/v1/agent/self", nil)
|
|
assert.Nil(err)
|
|
respW := httptest.NewRecorder()
|
|
s.Server.AgentSelfRequest(respW, req)
|
|
|
|
// now make a metrics endpoint request, which should be already initialized
|
|
// and written to
|
|
req, err = http.NewRequest("GET", "/v1/metrics", nil)
|
|
assert.Nil(err)
|
|
respW = httptest.NewRecorder()
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
resp, err := s.Server.MetricsRequest(respW, req)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
respW.Flush()
|
|
|
|
res := resp.(metrics.MetricsSummary)
|
|
return len(res.Gauges) != 0, nil
|
|
}, func(err error) {
|
|
t.Fatalf("should have metrics: %v", err)
|
|
})
|
|
})
|
|
}
|
|
|
|
// When emitting metrics, the client should use the local copy of the allocs with
|
|
// updated task states (not the copy submitted by the server).
|
|
func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
|
|
t.Parallel()
|
|
require := require.New(t)
|
|
numTasks := 10
|
|
|
|
httpTest(t, func(c *Config) {
|
|
c.Telemetry.PublishAllocationMetrics = true
|
|
c.Telemetry.PublishNodeMetrics = true
|
|
c.Telemetry.BackwardsCompatibleMetrics = false
|
|
c.Telemetry.DisableTaggedMetrics = false
|
|
}, func(s *TestAgent) {
|
|
// Create the job, wait for it to finish
|
|
job := mock.BatchJob()
|
|
job.TaskGroups[0].Count = numTasks
|
|
testutil.RegisterJob(t, s.RPC, job)
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
time.Sleep(200 * time.Millisecond)
|
|
args := &structs.JobSpecificRequest{}
|
|
args.JobID = job.ID
|
|
args.QueryOptions.Region = "global"
|
|
var resp structs.SingleJobResponse
|
|
err := s.RPC("Job.GetJob", args, &resp)
|
|
return err == nil && resp.Job.Status == "dead", err
|
|
}, func(err error) {
|
|
require.Fail("timed-out waiting for job to complete")
|
|
})
|
|
|
|
// wait for metrics to converge
|
|
var pending, running, terminal float32 = -1.0, -1.0, -1.0
|
|
testutil.WaitForResultRetries(100, func() (bool, error) {
|
|
time.Sleep(100 * time.Millisecond)
|
|
req, err := http.NewRequest("GET", "/v1/metrics", nil)
|
|
require.NoError(err)
|
|
respW := httptest.NewRecorder()
|
|
|
|
obj, err := s.Server.MetricsRequest(respW, req)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
metrics := obj.(metrics.MetricsSummary)
|
|
for _, g := range metrics.Gauges {
|
|
if strings.HasSuffix(g.Name, "client.allocations.pending") {
|
|
pending = g.Value
|
|
}
|
|
if strings.HasSuffix(g.Name, "client.allocations.running") {
|
|
running = g.Value
|
|
}
|
|
if strings.HasSuffix(g.Name, "client.allocations.terminal") {
|
|
terminal = g.Value
|
|
}
|
|
}
|
|
// client alloc metrics should reflect that there is numTasks terminal allocs and no other allocs
|
|
return pending == float32(0) && running == float32(0) &&
|
|
terminal == float32(numTasks), nil
|
|
}, func(err error) {
|
|
require.Fail("timed out waiting for metrics to converge",
|
|
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
|
|
})
|
|
})
|
|
}
|