2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2017-09-04 03:50:05 +00:00
|
|
|
package agent
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
|
|
|
"net/http/httptest"
|
2019-04-30 14:31:35 +00:00
|
|
|
"strings"
|
2017-09-04 03:50:05 +00:00
|
|
|
"testing"
|
2019-04-30 14:31:35 +00:00
|
|
|
"time"
|
2017-09-04 03:50:05 +00:00
|
|
|
|
2019-04-30 14:31:35 +00:00
|
|
|
"github.com/armon/go-metrics"
|
2022-03-15 12:42:43 +00:00
|
|
|
"github.com/hashicorp/nomad/ci"
|
2019-04-30 14:31:35 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2017-10-24 01:38:36 +00:00
|
|
|
"github.com/hashicorp/nomad/testutil"
|
2017-09-04 03:50:05 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2019-04-30 14:31:35 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2017-09-04 03:50:05 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestHTTP_MetricsWithIllegalMethod(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2017-09-04 03:50:05 +00:00
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
|
|
req, err := http.NewRequest("DELETE", "/v1/metrics", nil)
|
|
|
|
assert.Nil(err)
|
|
|
|
respW := httptest.NewRecorder()
|
|
|
|
|
|
|
|
_, err = s.Server.MetricsRequest(respW, req)
|
|
|
|
assert.NotNil(err, "HTTP DELETE should not be accepted for this endpoint")
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-03-09 14:28:58 +00:00
|
|
|
func TestHTTP_MetricsPrometheusDisabled(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2021-03-09 14:28:58 +00:00
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
httpTest(t, func(c *Config) { c.Telemetry.PrometheusMetrics = false }, func(s *TestAgent) {
|
|
|
|
req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil)
|
|
|
|
assert.Nil(err)
|
|
|
|
|
|
|
|
resp, err := s.Server.MetricsRequest(nil, req)
|
|
|
|
assert.Nil(resp)
|
|
|
|
assert.Error(err, "Prometheus is not enabled")
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestHTTP_MetricsPrometheusEnabled(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2021-03-09 14:28:58 +00:00
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
|
|
req, err := http.NewRequest("GET", "/v1/metrics?format=prometheus", nil)
|
|
|
|
assert.Nil(err)
|
|
|
|
respW := httptest.NewRecorder()
|
|
|
|
|
|
|
|
resp, err := s.Server.MetricsRequest(respW, req)
|
|
|
|
assert.Nil(resp)
|
|
|
|
assert.Nil(err)
|
|
|
|
|
|
|
|
// Ensure the response body is not empty and that it contains something
|
|
|
|
// that looks like a metric we expect.
|
|
|
|
assert.NotNil(respW.Body)
|
|
|
|
assert.Contains(respW.Body.String(), "HELP go_gc_duration_seconds")
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2017-09-04 03:50:05 +00:00
|
|
|
func TestHTTP_Metrics(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2017-09-04 03:50:05 +00:00
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
httpTest(t, nil, func(s *TestAgent) {
|
|
|
|
// make a separate HTTP request first, to ensure Nomad has written metrics
|
|
|
|
// and prevent a race condition
|
|
|
|
req, err := http.NewRequest("GET", "/v1/agent/self", nil)
|
|
|
|
assert.Nil(err)
|
|
|
|
respW := httptest.NewRecorder()
|
|
|
|
s.Server.AgentSelfRequest(respW, req)
|
|
|
|
|
|
|
|
// now make a metrics endpoint request, which should be already initialized
|
|
|
|
// and written to
|
|
|
|
req, err = http.NewRequest("GET", "/v1/metrics", nil)
|
|
|
|
assert.Nil(err)
|
|
|
|
respW = httptest.NewRecorder()
|
|
|
|
|
2017-10-24 01:38:36 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
resp, err := s.Server.MetricsRequest(respW, req)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
respW.Flush()
|
|
|
|
|
|
|
|
res := resp.(metrics.MetricsSummary)
|
|
|
|
return len(res.Gauges) != 0, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("should have metrics: %v", err)
|
|
|
|
})
|
2017-09-04 03:50:05 +00:00
|
|
|
})
|
|
|
|
}
|
2019-04-30 14:31:35 +00:00
|
|
|
|
|
|
|
// When emitting metrics, the client should use the local copy of the allocs with
|
|
|
|
// updated task states (not the copy submitted by the server).
|
2020-02-07 23:39:06 +00:00
|
|
|
//
|
|
|
|
// **Cannot** be run in parallel as metrics are global.
|
2019-04-30 14:31:35 +00:00
|
|
|
func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2019-04-30 14:31:35 +00:00
|
|
|
require := require.New(t)
|
|
|
|
numTasks := 10
|
|
|
|
|
|
|
|
httpTest(t, func(c *Config) {
|
|
|
|
c.Telemetry.PublishAllocationMetrics = true
|
|
|
|
c.Telemetry.PublishNodeMetrics = true
|
|
|
|
}, func(s *TestAgent) {
|
|
|
|
// Create the job, wait for it to finish
|
|
|
|
job := mock.BatchJob()
|
|
|
|
job.TaskGroups[0].Count = numTasks
|
|
|
|
testutil.RegisterJob(t, s.RPC, job)
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
args := &structs.JobSpecificRequest{}
|
|
|
|
args.JobID = job.ID
|
|
|
|
args.QueryOptions.Region = "global"
|
|
|
|
var resp structs.SingleJobResponse
|
|
|
|
err := s.RPC("Job.GetJob", args, &resp)
|
|
|
|
return err == nil && resp.Job.Status == "dead", err
|
|
|
|
}, func(err error) {
|
|
|
|
require.Fail("timed-out waiting for job to complete")
|
|
|
|
})
|
|
|
|
|
tests: deflake TestHTTP_FreshClientAllocMetrics
The test asserts that alloc counts get reported accurately in metrics by
inspecting the metrics endpoint directly. Sadly, the metrics as
collected by `armon/go-metrics` seem to be stateful and may contain info
from other tests.
This means that the test can fail depending on the order of returned
metrics.
Inspecting the metrics output of one failing run, you can see the
duplicate guage entries but for different node_ids:
```
{
"Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal",
"Value": 10,
"Labels": {
"datacenter": "dc1",
"node_class": "none",
"node_id": "67402bf4-00f3-bd8d-9fa8-f4d1924a892a"
}
},
{
"Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal",
"Value": 0,
"Labels": {
"datacenter": "dc1",
"node_class": "none",
"node_id": "a2945b48-7e66-68e2-c922-49b20dd4e20c"
}
},
```
2019-11-22 23:41:21 +00:00
|
|
|
nodeID := s.client.NodeID()
|
|
|
|
|
2019-04-30 14:31:35 +00:00
|
|
|
// wait for metrics to converge
|
|
|
|
var pending, running, terminal float32 = -1.0, -1.0, -1.0
|
|
|
|
testutil.WaitForResultRetries(100, func() (bool, error) {
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
req, err := http.NewRequest("GET", "/v1/metrics", nil)
|
|
|
|
require.NoError(err)
|
|
|
|
respW := httptest.NewRecorder()
|
|
|
|
|
|
|
|
obj, err := s.Server.MetricsRequest(respW, req)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
metrics := obj.(metrics.MetricsSummary)
|
|
|
|
for _, g := range metrics.Gauges {
|
tests: deflake TestHTTP_FreshClientAllocMetrics
The test asserts that alloc counts get reported accurately in metrics by
inspecting the metrics endpoint directly. Sadly, the metrics as
collected by `armon/go-metrics` seem to be stateful and may contain info
from other tests.
This means that the test can fail depending on the order of returned
metrics.
Inspecting the metrics output of one failing run, you can see the
duplicate guage entries but for different node_ids:
```
{
"Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal",
"Value": 10,
"Labels": {
"datacenter": "dc1",
"node_class": "none",
"node_id": "67402bf4-00f3-bd8d-9fa8-f4d1924a892a"
}
},
{
"Name": "service-name.default-0a3ba4b6-2109-485e-be74-6864228aed3d.client.allocations.terminal",
"Value": 0,
"Labels": {
"datacenter": "dc1",
"node_class": "none",
"node_id": "a2945b48-7e66-68e2-c922-49b20dd4e20c"
}
},
```
2019-11-22 23:41:21 +00:00
|
|
|
|
|
|
|
// ignore client metrics belonging to other test nodes
|
|
|
|
// from other tests that contaminate go-metrics reporting
|
|
|
|
if g.DisplayLabels["node_id"] != nodeID {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-04-30 14:31:35 +00:00
|
|
|
if strings.HasSuffix(g.Name, "client.allocations.pending") {
|
|
|
|
pending = g.Value
|
|
|
|
}
|
|
|
|
if strings.HasSuffix(g.Name, "client.allocations.running") {
|
|
|
|
running = g.Value
|
|
|
|
}
|
|
|
|
if strings.HasSuffix(g.Name, "client.allocations.terminal") {
|
|
|
|
terminal = g.Value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// client alloc metrics should reflect that there is numTasks terminal allocs and no other allocs
|
|
|
|
return pending == float32(0) && running == float32(0) &&
|
|
|
|
terminal == float32(numTasks), nil
|
|
|
|
}, func(err error) {
|
|
|
|
require.Fail("timed out waiting for metrics to converge",
|
2019-09-19 02:17:42 +00:00
|
|
|
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
|
2019-04-30 14:31:35 +00:00
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|