diff --git a/.changelog/12905.txt b/.changelog/12905.txt new file mode 100644 index 000000000..7b2889f24 --- /dev/null +++ b/.changelog/12905.txt @@ -0,0 +1,3 @@ +```release-note:improvement +metrics: Service RPC calls less than 1ms are now emitted as a decimal number. +``` diff --git a/agent/metrics_test.go b/agent/metrics_test.go index fde404713..4b80f9eda 100644 --- a/agent/metrics_test.go +++ b/agent/metrics_test.go @@ -47,7 +47,7 @@ func assertMetricExists(t *testing.T, respRec *httptest.ResponseRecorder, metric } } -// assertMetricExistsWithLabels looks in the prometheus metrics reponse for the metric name and all the labels. eg: +// assertMetricExistsWithLabels looks in the prometheus metrics response for the metric name and all the labels. eg: // new_rpc_metrics_rpc_server_call{errored="false",method="Status.Ping",request_type="unknown",rpc_type="net/rpc"} func assertMetricExistsWithLabels(t *testing.T, respRec *httptest.ResponseRecorder, metric string, labelNames []string) { if respRec.Body.String() == "" { @@ -136,6 +136,28 @@ func assertMetricExistsWithValue(t *testing.T, respRec *httptest.ResponseRecorde } } +func assertMetricsWithLabelIsNonZero(t *testing.T, respRec *httptest.ResponseRecorder, label, labelValue string) { + if respRec.Body.String() == "" { + t.Fatalf("Response body is empty.") + } + + metrics := respRec.Body.String() + labelWithValueTarget := label + "=" + "\"" + labelValue + "\"" + + for _, line := range strings.Split(metrics, "\n") { + if len(line) < 1 || line[0] == '#' { + continue + } + + if strings.Contains(line, labelWithValueTarget) { + s := strings.SplitN(line, " ", 2) + if s[1] == "0" { + t.Fatalf("Metric with label provided \"%s:%s\" has the value 0", label, labelValue) + } + } + } +} + func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, metric string) { if respRec.Body.String() == "" { t.Fatalf("Response body is empty.") @@ -205,6 +227,8 @@ func TestAgent_OneTwelveRPCMetrics(t *testing.T) { assertMetricExistsWithLabels(t, respRec, metricsPrefix+"_rpc_server_call", []string{"errored", "method", "request_type", "rpc_type", "leader"}) // make sure we see 3 Status.Ping metrics corresponding to the calls we made above assertLabelWithValueForMetricExistsNTime(t, respRec, metricsPrefix+"_rpc_server_call", "method", "Status.Ping", 3) + // make sure rpc calls with elapsed time below 1ms are reported as decimal + assertMetricsWithLabelIsNonZero(t, respRec, "method", "Status.Ping") }) } diff --git a/agent/rpc/middleware/interceptors.go b/agent/rpc/middleware/interceptors.go index 049283ac2..6abcf0a44 100644 --- a/agent/rpc/middleware/interceptors.go +++ b/agent/rpc/middleware/interceptors.go @@ -49,7 +49,8 @@ func NewRequestRecorder(logger hclog.Logger, isLeader func() bool, localDC strin } func (r *RequestRecorder) Record(requestName string, rpcType string, start time.Time, request interface{}, respErrored bool) { - elapsed := time.Since(start).Milliseconds() + elapsed := time.Since(start).Microseconds() + elapsedMs := float32(elapsed) / 1000 reqType := requestType(request) isLeader := r.getServerLeadership() @@ -64,7 +65,7 @@ func (r *RequestRecorder) Record(requestName string, rpcType string, start time. labels = r.addOptionalLabels(request, labels) // math.MaxInt64 < math.MaxFloat32 is true so we should be good! - r.RecorderFunc(metricRPCRequest, float32(elapsed), labels) + r.RecorderFunc(metricRPCRequest, elapsedMs, labels) labelsArr := flattenLabels(labels) r.Logger.Trace(requestLogName, labelsArr...) diff --git a/website/content/docs/agent/telemetry.mdx b/website/content/docs/agent/telemetry.mdx index 3926ada34..21654025e 100644 --- a/website/content/docs/agent/telemetry.mdx +++ b/website/content/docs/agent/telemetry.mdx @@ -557,8 +557,6 @@ Label based RPC metrics were added in Consul 1.12.0 as a Beta feature to better | ------------------------------------- | --------------------------------------------------------- | ------ | --------- | | `consul.rpc.server.call` | Measures the elapsed time taken to complete an RPC call. | ms | summary | -Note that values of the `consul.rpc.server.call` may emit as `0 ms`. That means that the elapsed time < `1 ms`. - ### Labels The the server workload metrics above come with the following labels: