gRPC server metrics (#14922)

* Move stats.go from grpc-internal to grpc-middleware
* Update grpc server metrics with server type label
* Add stats test to grpc-external
* Remove global metrics instance from grpc server tests
This commit is contained in:
Paul Glass 2022-10-11 17:00:32 -05:00 committed by GitHub
parent 5eb5fe41c1
commit 8cf430140a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 305 additions and 150 deletions

3
.changelog/14922.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
grpc: Added metrics for external gRPC server. Added `server_type=internal|external` label to gRPC metrics.
```

View File

@ -553,7 +553,10 @@ func (a *Agent) Start(ctx context.Context) error {
// This needs to happen after the initial auto-config is loaded, because TLS // This needs to happen after the initial auto-config is loaded, because TLS
// can only be configured on the gRPC server at the point of creation. // can only be configured on the gRPC server at the point of creation.
a.externalGRPCServer = external.NewServer(a.logger.Named("grpc.external")) a.externalGRPCServer = external.NewServer(
a.logger.Named("grpc.external"),
metrics.Default(),
)
if err := a.startLicenseManager(ctx); err != nil { if err := a.startLicenseManager(ctx); err != nil {
return err return err

View File

@ -845,7 +845,7 @@ func newGRPCHandlerFromConfig(deps Deps, config *Config, s *Server) connHandler
s.externalConnectCAServer.Register(srv) s.externalConnectCAServer.Register(srv)
} }
return agentgrpc.NewHandler(deps.Logger, config.RPCAddr, register) return agentgrpc.NewHandler(deps.Logger, config.RPCAddr, register, nil)
} }
func (s *Server) connectCARootsMonitor(ctx context.Context) { func (s *Server) connectCARootsMonitor(ctx context.Context) {

View File

@ -328,7 +328,7 @@ func newServerWithDeps(t *testing.T, c *Config, deps Deps) (*Server, error) {
oldNotify() oldNotify()
} }
} }
grpcServer := external.NewServer(deps.Logger.Named("grpc.external")) grpcServer := external.NewServer(deps.Logger.Named("grpc.external"), nil)
srv, err := NewServer(c, deps, grpcServer) srv, err := NewServer(c, deps, grpcServer)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -3,6 +3,7 @@ package external
import ( import (
"time" "time"
"github.com/armon/go-metrics"
middleware "github.com/grpc-ecosystem/go-grpc-middleware" middleware "github.com/grpc-ecosystem/go-grpc-middleware"
recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery" recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery"
"google.golang.org/grpc" "google.golang.org/grpc"
@ -11,13 +12,24 @@ import (
agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware" agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware"
) )
var (
metricsLabels = []metrics.Label{{
Name: "server_type",
Value: "external",
}}
)
// NewServer constructs a gRPC server for the external gRPC port, to which // NewServer constructs a gRPC server for the external gRPC port, to which
// handlers can be registered. // handlers can be registered.
func NewServer(logger agentmiddleware.Logger) *grpc.Server { func NewServer(logger agentmiddleware.Logger, metricsObj *metrics.Metrics) *grpc.Server {
if metricsObj == nil {
metricsObj = metrics.Default()
}
recoveryOpts := agentmiddleware.PanicHandlerMiddlewareOpts(logger) recoveryOpts := agentmiddleware.PanicHandlerMiddlewareOpts(logger)
opts := []grpc.ServerOption{ opts := []grpc.ServerOption{
grpc.MaxConcurrentStreams(2048), grpc.MaxConcurrentStreams(2048),
grpc.StatsHandler(agentmiddleware.NewStatsHandler(metricsObj, metricsLabels)),
middleware.WithUnaryServerChain( middleware.WithUnaryServerChain(
// Add middlware interceptors to recover in case of panics. // Add middlware interceptors to recover in case of panics.
recovery.UnaryServerInterceptor(recoveryOpts...), recovery.UnaryServerInterceptor(recoveryOpts...),
@ -25,6 +37,7 @@ func NewServer(logger agentmiddleware.Logger) *grpc.Server {
middleware.WithStreamServerChain( middleware.WithStreamServerChain(
// Add middlware interceptors to recover in case of panics. // Add middlware interceptors to recover in case of panics.
recovery.StreamServerInterceptor(recoveryOpts...), recovery.StreamServerInterceptor(recoveryOpts...),
agentmiddleware.NewActiveStreamCounter(metricsObj, metricsLabels).Intercept,
), ),
grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
// This must be less than the keealive.ClientParameters Time setting, otherwise // This must be less than the keealive.ClientParameters Time setting, otherwise

View File

@ -0,0 +1,104 @@
package external
import (
"context"
"net"
"sort"
"testing"
"github.com/armon/go-metrics"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
"google.golang.org/grpc"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/agent/grpc-middleware/testutil"
"github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice"
"github.com/hashicorp/consul/proto/prototest"
)
func TestServer_EmitsStats(t *testing.T) {
sink, metricsObj := testutil.NewFakeSink(t)
srv := NewServer(hclog.Default(), metricsObj)
testservice.RegisterSimpleServer(srv, &testservice.Simple{})
lis, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
g, ctx := errgroup.WithContext(ctx)
g.Go(func() error {
return srv.Serve(lis)
})
t.Cleanup(func() {
srv.Stop()
if err := g.Wait(); err != nil {
t.Logf("grpc server error: %v", err)
}
})
conn, err := grpc.DialContext(ctx, lis.Addr().String(), grpc.WithInsecure())
require.NoError(t, err)
t.Cleanup(func() { conn.Close() })
client := testservice.NewSimpleClient(conn)
fClient, err := client.Flow(ctx, &testservice.Req{Datacenter: "mine"})
require.NoError(t, err)
// Wait for the first event so that we know the stream is sending.
_, err = fClient.Recv()
require.NoError(t, err)
cancel()
conn.Close()
srv.GracefulStop()
// Wait for the server to stop so that active_streams is predictable.
require.NoError(t, g.Wait())
// Occasionally the active_stream=0 metric may be emitted before the
// active_conns=0 metric. The order of those metrics is not really important
// so we sort the calls to match the expected.
sort.Slice(sink.GaugeCalls, func(i, j int) bool {
if i < 2 || j < 2 {
return i < j
}
if len(sink.GaugeCalls[i].Key) < 4 || len(sink.GaugeCalls[j].Key) < 4 {
return i < j
}
return sink.GaugeCalls[i].Key[3] < sink.GaugeCalls[j].Key[3]
})
cmpMetricCalls := cmp.AllowUnexported(testutil.MetricCall{})
expLabels := []metrics.Label{{
Name: "server_type",
Value: "external",
}}
expectedGauge := []testutil.MetricCall{
{Key: []string{"testing", "grpc", "server", "connections"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "streams"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "connections"}, Val: 0, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "streams"}, Val: 0, Labels: expLabels},
}
prototest.AssertDeepEqual(t, expectedGauge, sink.GaugeCalls, cmpMetricCalls)
expectedCounter := []testutil.MetricCall{
{Key: []string{"testing", "grpc", "server", "connection", "count"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "request", "count"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "stream", "count"}, Val: 1, Labels: expLabels},
}
prototest.AssertDeepEqual(t, expectedCounter, sink.IncrCounterCalls, cmpMetricCalls)
}
func logError(t *testing.T, f func() error) func() {
return func() {
if err := f(); err != nil {
t.Logf(err.Error())
}
}
}

View File

@ -10,6 +10,8 @@ import (
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/keepalive" "google.golang.org/grpc/keepalive"
"github.com/armon/go-metrics"
agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware"
"github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/tlsutil" "github.com/hashicorp/consul/tlsutil"
@ -129,7 +131,7 @@ func (c *ClientConnPool) dial(datacenter string, serverType string) (*grpc.Clien
grpc.WithInsecure(), grpc.WithInsecure(),
grpc.WithContextDialer(c.dialer), grpc.WithContextDialer(c.dialer),
grpc.WithDisableRetry(), grpc.WithDisableRetry(),
grpc.WithStatsHandler(newStatsHandler(defaultMetrics())), grpc.WithStatsHandler(agentmiddleware.NewStatsHandler(metrics.Default(), metricsLabels)),
// nolint:staticcheck // there is no other supported alternative to WithBalancerName // nolint:staticcheck // there is no other supported alternative to WithBalancerName
grpc.WithBalancerName("pick_first"), grpc.WithBalancerName("pick_first"),
// Keep alive parameters are based on the same default ones we used for // Keep alive parameters are based on the same default ones we used for

View File

@ -14,8 +14,8 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/hashicorp/consul/agent/grpc-internal/internal/testservice"
"github.com/hashicorp/consul/agent/grpc-internal/resolver" "github.com/hashicorp/consul/agent/grpc-internal/resolver"
"github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice"
"github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/ipaddr" "github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/sdk/freeport" "github.com/hashicorp/consul/sdk/freeport"

View File

@ -5,6 +5,7 @@ import (
"net" "net"
"time" "time"
"github.com/armon/go-metrics"
agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware" agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware"
middleware "github.com/grpc-ecosystem/go-grpc-middleware" middleware "github.com/grpc-ecosystem/go-grpc-middleware"
@ -13,18 +14,27 @@ import (
"google.golang.org/grpc/keepalive" "google.golang.org/grpc/keepalive"
) )
var (
metricsLabels = []metrics.Label{{
Name: "server_type",
Value: "internal",
}}
)
// NewHandler returns a gRPC server that accepts connections from Handle(conn). // NewHandler returns a gRPC server that accepts connections from Handle(conn).
// The register function will be called with the grpc.Server to register // The register function will be called with the grpc.Server to register
// gRPC services with the server. // gRPC services with the server.
func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server)) *Handler { func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server), metricsObj *metrics.Metrics) *Handler {
metrics := defaultMetrics() if metricsObj == nil {
metricsObj = metrics.Default()
}
// We don't need to pass tls.Config to the server since it's multiplexed // We don't need to pass tls.Config to the server since it's multiplexed
// behind the RPC listener, which already has TLS configured. // behind the RPC listener, which already has TLS configured.
recoveryOpts := agentmiddleware.PanicHandlerMiddlewareOpts(logger) recoveryOpts := agentmiddleware.PanicHandlerMiddlewareOpts(logger)
opts := []grpc.ServerOption{ opts := []grpc.ServerOption{
grpc.StatsHandler(newStatsHandler(metrics)), grpc.StatsHandler(agentmiddleware.NewStatsHandler(metricsObj, metricsLabels)),
middleware.WithUnaryServerChain( middleware.WithUnaryServerChain(
// Add middlware interceptors to recover in case of panics. // Add middlware interceptors to recover in case of panics.
recovery.UnaryServerInterceptor(recoveryOpts...), recovery.UnaryServerInterceptor(recoveryOpts...),
@ -32,7 +42,7 @@ func NewHandler(logger Logger, addr net.Addr, register func(server *grpc.Server)
middleware.WithStreamServerChain( middleware.WithStreamServerChain(
// Add middlware interceptors to recover in case of panics. // Add middlware interceptors to recover in case of panics.
recovery.StreamServerInterceptor(recoveryOpts...), recovery.StreamServerInterceptor(recoveryOpts...),
(&activeStreamCounter{metrics: metrics}).Intercept, agentmiddleware.NewActiveStreamCounter(metricsObj, metricsLabels).Intercept,
), ),
grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
MinTime: 15 * time.Second, MinTime: 15 * time.Second,

View File

@ -13,8 +13,8 @@ import (
"google.golang.org/grpc/codes" "google.golang.org/grpc/codes"
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
"github.com/hashicorp/consul/agent/grpc-internal/internal/testservice"
"github.com/hashicorp/consul/agent/grpc-internal/resolver" "github.com/hashicorp/consul/agent/grpc-internal/resolver"
"github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice"
) )
func TestHandler_PanicRecoveryInterceptor(t *testing.T) { func TestHandler_PanicRecoveryInterceptor(t *testing.T) {
@ -57,5 +57,6 @@ func TestHandler_PanicRecoveryInterceptor(t *testing.T) {
// Checking the entire stack trace is not possible, let's // Checking the entire stack trace is not possible, let's
// make sure that it contains a couple of expected strings. // make sure that it contains a couple of expected strings.
require.Contains(t, strLog, `[ERROR] panic serving grpc request: panic="panic from Something`) require.Contains(t, strLog, `[ERROR] panic serving grpc request: panic="panic from Something`)
require.Contains(t, strLog, `github.com/hashicorp/consul/agent/grpc-internal.(*simplePanic).Something`) require.Contains(t, strLog, `github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice.(*SimplePanic).Something`)
} }

View File

@ -1,7 +1,6 @@
package internal package internal
import ( import (
"context"
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"io" "io"
@ -15,7 +14,7 @@ import (
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"google.golang.org/grpc" "google.golang.org/grpc"
"github.com/hashicorp/consul/agent/grpc-internal/internal/testservice" "github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice"
"github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/tlsutil" "github.com/hashicorp/consul/tlsutil"
@ -42,20 +41,20 @@ func (s testServer) Metadata() *metadata.Server {
func newSimpleTestServer(t *testing.T, name, dc string, tlsConf *tlsutil.Configurator) testServer { func newSimpleTestServer(t *testing.T, name, dc string, tlsConf *tlsutil.Configurator) testServer {
return newTestServer(t, hclog.Default(), name, dc, tlsConf, func(server *grpc.Server) { return newTestServer(t, hclog.Default(), name, dc, tlsConf, func(server *grpc.Server) {
testservice.RegisterSimpleServer(server, &simple{name: name, dc: dc}) testservice.RegisterSimpleServer(server, &testservice.Simple{Name: name, DC: dc})
}) })
} }
// newPanicTestServer sets up a simple server with handlers that panic. // newPanicTestServer sets up a simple server with handlers that panic.
func newPanicTestServer(t *testing.T, logger hclog.Logger, name, dc string, tlsConf *tlsutil.Configurator) testServer { func newPanicTestServer(t *testing.T, logger hclog.Logger, name, dc string, tlsConf *tlsutil.Configurator) testServer {
return newTestServer(t, logger, name, dc, tlsConf, func(server *grpc.Server) { return newTestServer(t, logger, name, dc, tlsConf, func(server *grpc.Server) {
testservice.RegisterSimpleServer(server, &simplePanic{name: name, dc: dc}) testservice.RegisterSimpleServer(server, &testservice.SimplePanic{Name: name, DC: dc})
}) })
} }
func newTestServer(t *testing.T, logger hclog.Logger, name, dc string, tlsConf *tlsutil.Configurator, register func(server *grpc.Server)) testServer { func newTestServer(t *testing.T, logger hclog.Logger, name, dc string, tlsConf *tlsutil.Configurator, register func(server *grpc.Server)) testServer {
addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")}
handler := NewHandler(logger, addr, register) handler := NewHandler(logger, addr, register, nil)
lis, err := net.Listen("tcp", "127.0.0.1:0") lis, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err) require.NoError(t, err)
@ -95,43 +94,6 @@ func newTestServer(t *testing.T, logger hclog.Logger, name, dc string, tlsConf *
} }
} }
type simple struct {
name string
dc string
}
func (s *simple) Flow(_ *testservice.Req, flow testservice.Simple_FlowServer) error {
for flow.Context().Err() == nil {
resp := &testservice.Resp{ServerName: "one", Datacenter: s.dc}
if err := flow.Send(resp); err != nil {
return err
}
time.Sleep(time.Millisecond)
}
return nil
}
func (s *simple) Something(_ context.Context, _ *testservice.Req) (*testservice.Resp, error) {
return &testservice.Resp{ServerName: s.name, Datacenter: s.dc}, nil
}
type simplePanic struct {
name, dc string
}
func (s *simplePanic) Flow(_ *testservice.Req, flow testservice.Simple_FlowServer) error {
for flow.Context().Err() == nil {
time.Sleep(time.Millisecond)
panic("panic from Flow")
}
return nil
}
func (s *simplePanic) Something(_ context.Context, _ *testservice.Req) (*testservice.Resp, error) {
time.Sleep(time.Millisecond)
panic("panic from Something")
}
// fakeRPCListener mimics agent/consul.Server.listen to handle the RPCType byte. // fakeRPCListener mimics agent/consul.Server.listen to handle the RPCType byte.
// In the future we should be able to refactor Server and extract this RPC // In the future we should be able to refactor Server and extract this RPC
// handling logic so that we don't need to use a fake. // handling logic so that we don't need to use a fake.

View File

@ -370,10 +370,15 @@ var _ Backend = (*testBackend)(nil)
func runTestServer(t *testing.T, server *Server) net.Addr { func runTestServer(t *testing.T, server *Server) net.Addr {
addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")}
var grpcServer *gogrpc.Server var grpcServer *gogrpc.Server
handler := grpc.NewHandler(hclog.New(nil), addr, func(srv *gogrpc.Server) { handler := grpc.NewHandler(
grpcServer = srv hclog.New(nil),
pbsubscribe.RegisterStateChangeSubscriptionServer(srv, server) addr,
}) func(srv *gogrpc.Server) {
grpcServer = srv
pbsubscribe.RegisterStateChangeSubscriptionServer(srv, server)
},
nil,
)
lis, err := net.Listen("tcp", "127.0.0.1:0") lis, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err) require.NoError(t, err)

View File

@ -4,9 +4,7 @@ import (
"context" "context"
"net" "net"
"sort" "sort"
"sync"
"testing" "testing"
"time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
@ -16,20 +14,20 @@ import (
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/agent/grpc-internal/internal/testservice" "github.com/hashicorp/consul/agent/grpc-middleware/testutil"
"github.com/hashicorp/consul/agent/grpc-middleware/testutil/testservice"
"github.com/hashicorp/consul/proto/prototest" "github.com/hashicorp/consul/proto/prototest"
) )
func noopRegister(*grpc.Server) {} func noopRegister(*grpc.Server) {}
func TestHandler_EmitsStats(t *testing.T) { func TestHandler_EmitsStats(t *testing.T) {
sink, reset := patchGlobalMetrics(t) sink, metricsObj := testutil.NewFakeSink(t)
addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")} addr := &net.IPAddr{IP: net.ParseIP("127.0.0.1")}
handler := NewHandler(hclog.Default(), addr, noopRegister) handler := NewHandler(hclog.Default(), addr, noopRegister, metricsObj)
reset()
testservice.RegisterSimpleServer(handler.srv, &simple{}) testservice.RegisterSimpleServer(handler.srv, &testservice.Simple{})
lis, err := net.Listen("tcp", "127.0.0.1:0") lis, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err) require.NoError(t, err)
@ -71,80 +69,35 @@ func TestHandler_EmitsStats(t *testing.T) {
// Occasionally the active_stream=0 metric may be emitted before the // Occasionally the active_stream=0 metric may be emitted before the
// active_conns=0 metric. The order of those metrics is not really important // active_conns=0 metric. The order of those metrics is not really important
// so we sort the calls to match the expected. // so we sort the calls to match the expected.
sort.Slice(sink.gaugeCalls, func(i, j int) bool { sort.Slice(sink.GaugeCalls, func(i, j int) bool {
if i < 2 || j < 2 { if i < 2 || j < 2 {
return i < j return i < j
} }
if len(sink.gaugeCalls[i].key) < 4 || len(sink.gaugeCalls[j].key) < 4 { if len(sink.GaugeCalls[i].Key) < 4 || len(sink.GaugeCalls[j].Key) < 4 {
return i < j return i < j
} }
return sink.gaugeCalls[i].key[3] < sink.gaugeCalls[j].key[3] return sink.GaugeCalls[i].Key[3] < sink.GaugeCalls[j].Key[3]
}) })
cmpMetricCalls := cmp.AllowUnexported(metricCall{}) cmpMetricCalls := cmp.AllowUnexported(testutil.MetricCall{})
expectedGauge := []metricCall{ expLabels := []metrics.Label{{
{key: []string{"testing", "grpc", "server", "connections"}, val: 1}, Name: "server_type",
{key: []string{"testing", "grpc", "server", "streams"}, val: 1}, Value: "internal",
{key: []string{"testing", "grpc", "server", "connections"}, val: 0}, }}
{key: []string{"testing", "grpc", "server", "streams"}, val: 0}, expectedGauge := []testutil.MetricCall{
{Key: []string{"testing", "grpc", "server", "connections"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "streams"}, Val: 1, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "connections"}, Val: 0, Labels: expLabels},
{Key: []string{"testing", "grpc", "server", "streams"}, Val: 0, Labels: expLabels},
} }
prototest.AssertDeepEqual(t, expectedGauge, sink.gaugeCalls, cmpMetricCalls) prototest.AssertDeepEqual(t, expectedGauge, sink.GaugeCalls, cmpMetricCalls)
expectedCounter := []metricCall{ expectedCounter := []testutil.MetricCall{
{key: []string{"testing", "grpc", "server", "connection", "count"}, val: 1}, {Key: []string{"testing", "grpc", "server", "connection", "count"}, Val: 1, Labels: expLabels},
{key: []string{"testing", "grpc", "server", "request", "count"}, val: 1}, {Key: []string{"testing", "grpc", "server", "request", "count"}, Val: 1, Labels: expLabels},
{key: []string{"testing", "grpc", "server", "stream", "count"}, val: 1}, {Key: []string{"testing", "grpc", "server", "stream", "count"}, Val: 1, Labels: expLabels},
} }
prototest.AssertDeepEqual(t, expectedCounter, sink.incrCounterCalls, cmpMetricCalls) prototest.AssertDeepEqual(t, expectedCounter, sink.IncrCounterCalls, cmpMetricCalls)
}
func patchGlobalMetrics(t *testing.T) (*fakeMetricsSink, func()) {
t.Helper()
sink := &fakeMetricsSink{}
cfg := &metrics.Config{
ServiceName: "testing",
TimerGranularity: time.Millisecond, // Timers are in milliseconds
ProfileInterval: time.Second, // Poll runtime every second
FilterDefault: true,
}
var err error
defaultMetrics = func() *metrics.Metrics {
m, _ := metrics.New(cfg, sink)
return m
}
require.NoError(t, err)
reset := func() {
t.Helper()
defaultMetrics = metrics.Default
require.NoError(t, err, "failed to reset global metrics")
}
return sink, reset
}
type fakeMetricsSink struct {
lock sync.Mutex
metrics.BlackholeSink
gaugeCalls []metricCall
incrCounterCalls []metricCall
}
func (f *fakeMetricsSink) SetGaugeWithLabels(key []string, val float32, labels []metrics.Label) {
f.lock.Lock()
f.gaugeCalls = append(f.gaugeCalls, metricCall{key: key, val: val, labels: labels})
f.lock.Unlock()
}
func (f *fakeMetricsSink) IncrCounterWithLabels(key []string, val float32, labels []metrics.Label) {
f.lock.Lock()
f.incrCounterCalls = append(f.incrCounterCalls, metricCall{key: key, val: val, labels: labels})
f.lock.Unlock()
}
type metricCall struct {
key []string
val float32
labels []metrics.Label
} }
func logError(t *testing.T, f func() error) func() { func logError(t *testing.T, f func() error) func() {

View File

@ -1,4 +1,4 @@
package internal package middleware
import ( import (
"context" "context"
@ -47,8 +47,6 @@ var StatsCounters = []prometheus.CounterDefinition{
}, },
} }
var defaultMetrics = metrics.Default
// statsHandler is a grpc/stats.StatsHandler which emits connection and // statsHandler is a grpc/stats.StatsHandler which emits connection and
// request metrics to go-metrics. // request metrics to go-metrics.
type statsHandler struct { type statsHandler struct {
@ -57,10 +55,11 @@ type statsHandler struct {
// the struct. See https://golang.org/pkg/sync/atomic/#pkg-note-BUG. // the struct. See https://golang.org/pkg/sync/atomic/#pkg-note-BUG.
activeConns uint64 activeConns uint64
metrics *metrics.Metrics metrics *metrics.Metrics
labels []metrics.Label
} }
func newStatsHandler(m *metrics.Metrics) *statsHandler { func NewStatsHandler(m *metrics.Metrics, labels []metrics.Label) *statsHandler {
return &statsHandler{metrics: m} return &statsHandler{metrics: m, labels: labels}
} }
// TagRPC implements grpcStats.StatsHandler // TagRPC implements grpcStats.StatsHandler
@ -77,7 +76,7 @@ func (c *statsHandler) HandleRPC(_ context.Context, s stats.RPCStats) {
} }
switch s.(type) { switch s.(type) {
case *stats.InHeader: case *stats.InHeader:
c.metrics.IncrCounter([]string{"grpc", label, "request", "count"}, 1) c.metrics.IncrCounterWithLabels([]string{"grpc", label, "request", "count"}, 1, c.labels)
} }
} }
@ -97,12 +96,12 @@ func (c *statsHandler) HandleConn(_ context.Context, s stats.ConnStats) {
switch s.(type) { switch s.(type) {
case *stats.ConnBegin: case *stats.ConnBegin:
count = atomic.AddUint64(&c.activeConns, 1) count = atomic.AddUint64(&c.activeConns, 1)
c.metrics.IncrCounter([]string{"grpc", label, "connection", "count"}, 1) c.metrics.IncrCounterWithLabels([]string{"grpc", label, "connection", "count"}, 1, c.labels)
case *stats.ConnEnd: case *stats.ConnEnd:
// Decrement! // Decrement!
count = atomic.AddUint64(&c.activeConns, ^uint64(0)) count = atomic.AddUint64(&c.activeConns, ^uint64(0))
} }
c.metrics.SetGauge([]string{"grpc", label, "connections"}, float32(count)) c.metrics.SetGaugeWithLabels([]string{"grpc", label, "connections"}, float32(count), c.labels)
} }
type activeStreamCounter struct { type activeStreamCounter struct {
@ -111,6 +110,11 @@ type activeStreamCounter struct {
// the struct. See https://golang.org/pkg/sync/atomic/#pkg-note-BUG. // the struct. See https://golang.org/pkg/sync/atomic/#pkg-note-BUG.
count uint64 count uint64
metrics *metrics.Metrics metrics *metrics.Metrics
labels []metrics.Label
}
func NewActiveStreamCounter(m *metrics.Metrics, labels []metrics.Label) *activeStreamCounter {
return &activeStreamCounter{metrics: m, labels: labels}
} }
// GRPCCountingStreamInterceptor is a grpc.ServerStreamInterceptor that emits a // GRPCCountingStreamInterceptor is a grpc.ServerStreamInterceptor that emits a
@ -122,11 +126,11 @@ func (i *activeStreamCounter) Intercept(
handler grpc.StreamHandler, handler grpc.StreamHandler,
) error { ) error {
count := atomic.AddUint64(&i.count, 1) count := atomic.AddUint64(&i.count, 1)
i.metrics.SetGauge([]string{"grpc", "server", "streams"}, float32(count)) i.metrics.SetGaugeWithLabels([]string{"grpc", "server", "streams"}, float32(count), i.labels)
i.metrics.IncrCounter([]string{"grpc", "server", "stream", "count"}, 1) i.metrics.IncrCounterWithLabels([]string{"grpc", "server", "stream", "count"}, 1, i.labels)
defer func() { defer func() {
count := atomic.AddUint64(&i.count, ^uint64(0)) count := atomic.AddUint64(&i.count, ^uint64(0))
i.metrics.SetGauge([]string{"grpc", "server", "streams"}, float32(count)) i.metrics.SetGaugeWithLabels([]string{"grpc", "server", "streams"}, float32(count), i.labels)
}() }()
return handler(srv, ss) return handler(srv, ss)

View File

@ -0,0 +1,50 @@
package testutil
import (
"sync"
"testing"
"time"
"github.com/armon/go-metrics"
"github.com/stretchr/testify/require"
)
func NewFakeSink(t *testing.T) (*FakeMetricsSink, *metrics.Metrics) {
t.Helper()
sink := &FakeMetricsSink{}
cfg := &metrics.Config{
ServiceName: "testing",
TimerGranularity: time.Millisecond, // Timers are in milliseconds
ProfileInterval: time.Second, // Poll runtime every second
FilterDefault: true,
}
m, err := metrics.New(cfg, sink)
require.NoError(t, err)
return sink, m
}
type FakeMetricsSink struct {
lock sync.Mutex
metrics.BlackholeSink
GaugeCalls []MetricCall
IncrCounterCalls []MetricCall
}
func (f *FakeMetricsSink) SetGaugeWithLabels(key []string, val float32, labels []metrics.Label) {
f.lock.Lock()
f.GaugeCalls = append(f.GaugeCalls, MetricCall{Key: key, Val: val, Labels: labels})
f.lock.Unlock()
}
func (f *FakeMetricsSink) IncrCounterWithLabels(key []string, val float32, labels []metrics.Label) {
f.lock.Lock()
f.IncrCounterCalls = append(f.IncrCounterCalls, MetricCall{Key: key, Val: val, Labels: labels})
f.lock.Unlock()
}
type MetricCall struct {
Key []string
Val float32
Labels []metrics.Label
}

View File

@ -0,0 +1,43 @@
package testservice
import (
"context"
"time"
)
type Simple struct {
Name string
DC string
}
func (s *Simple) Flow(_ *Req, flow Simple_FlowServer) error {
for flow.Context().Err() == nil {
resp := &Resp{ServerName: "one", Datacenter: s.DC}
if err := flow.Send(resp); err != nil {
return err
}
time.Sleep(time.Millisecond)
}
return nil
}
func (s *Simple) Something(_ context.Context, _ *Req) (*Resp, error) {
return &Resp{ServerName: s.Name, Datacenter: s.DC}, nil
}
type SimplePanic struct {
Name, DC string
}
func (s *SimplePanic) Flow(_ *Req, flow Simple_FlowServer) error {
for flow.Context().Err() == nil {
time.Sleep(time.Millisecond)
panic("panic from Flow")
}
return nil
}
func (s *SimplePanic) Something(_ context.Context, _ *Req) (*Resp, error) {
time.Sleep(time.Millisecond)
panic("panic from Something")
}

View File

@ -1,5 +1,5 @@
// Code generated by protoc-gen-go-binary. DO NOT EDIT. // Code generated by protoc-gen-go-binary. DO NOT EDIT.
// source: agent/grpc-internal/internal/testservice/simple.proto // source: agent/grpc-middleware/testutil/testservice/simple.pb.binary.go
package testservice package testservice

View File

@ -2,7 +2,7 @@
// versions: // versions:
// protoc-gen-go v1.23.0 // protoc-gen-go v1.23.0
// protoc v3.15.8 // protoc v3.15.8
// source: agent/grpc-internal/internal/testservice/simple.proto // source: agent/grpc-middleware/testutil/testservice/simple.proto
package testservice package testservice

View File

@ -1414,7 +1414,7 @@ func newTestServer(t *testing.T, cb func(conf *consul.Config)) testingServer {
conf.ACLResolverSettings.EnterpriseMeta = *conf.AgentEnterpriseMeta() conf.ACLResolverSettings.EnterpriseMeta = *conf.AgentEnterpriseMeta()
deps := newDefaultDeps(t, conf) deps := newDefaultDeps(t, conf)
externalGRPCServer := external.NewServer(deps.Logger) externalGRPCServer := external.NewServer(deps.Logger, nil)
server, err := consul.NewServer(conf, deps, externalGRPCServer) server, err := consul.NewServer(conf, deps, externalGRPCServer)
require.NoError(t, err) require.NoError(t, err)

View File

@ -21,8 +21,9 @@ import (
"github.com/hashicorp/consul/agent/consul/usagemetrics" "github.com/hashicorp/consul/agent/consul/usagemetrics"
"github.com/hashicorp/consul/agent/consul/xdscapacity" "github.com/hashicorp/consul/agent/consul/xdscapacity"
"github.com/hashicorp/consul/agent/grpc-external/limiter" "github.com/hashicorp/consul/agent/grpc-external/limiter"
grpc "github.com/hashicorp/consul/agent/grpc-internal" grpcInt "github.com/hashicorp/consul/agent/grpc-internal"
"github.com/hashicorp/consul/agent/grpc-internal/resolver" "github.com/hashicorp/consul/agent/grpc-internal/resolver"
grpcWare "github.com/hashicorp/consul/agent/grpc-middleware"
"github.com/hashicorp/consul/agent/local" "github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/agent/router" "github.com/hashicorp/consul/agent/router"
@ -112,11 +113,11 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error)
Authority: cfg.Datacenter + "." + string(cfg.NodeID), Authority: cfg.Datacenter + "." + string(cfg.NodeID),
}) })
resolver.Register(builder) resolver.Register(builder)
d.GRPCConnPool = grpc.NewClientConnPool(grpc.ClientConnPoolConfig{ d.GRPCConnPool = grpcInt.NewClientConnPool(grpcInt.ClientConnPoolConfig{
Servers: builder, Servers: builder,
SrcAddr: d.ConnPool.SrcAddr, SrcAddr: d.ConnPool.SrcAddr,
TLSWrapper: grpc.TLSWrapper(d.TLSConfigurator.OutgoingRPCWrapper()), TLSWrapper: grpcInt.TLSWrapper(d.TLSConfigurator.OutgoingRPCWrapper()),
ALPNWrapper: grpc.ALPNWrapper(d.TLSConfigurator.OutgoingALPNRPCWrapper()), ALPNWrapper: grpcInt.ALPNWrapper(d.TLSConfigurator.OutgoingALPNRPCWrapper()),
UseTLSForDC: d.TLSConfigurator.UseTLS, UseTLSForDC: d.TLSConfigurator.UseTLS,
DialingFromServer: cfg.ServerMode, DialingFromServer: cfg.ServerMode,
DialingFromDatacenter: cfg.Datacenter, DialingFromDatacenter: cfg.Datacenter,
@ -201,7 +202,8 @@ func newConnPool(config *config.RuntimeConfig, logger hclog.Logger, tls *tlsutil
} }
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends // getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics. //
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) { func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) {
// TODO: "raft..." metrics come from the raft lib and we should migrate these to a telemetry // TODO: "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul. // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
@ -228,7 +230,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau
cache.Gauges, cache.Gauges,
consul.RPCGauges, consul.RPCGauges,
consul.SessionGauges, consul.SessionGauges,
grpc.StatsGauges, grpcWare.StatsGauges,
xds.StatsGauges, xds.StatsGauges,
usagemetrics.Gauges, usagemetrics.Gauges,
consul.ReplicationGauges, consul.ReplicationGauges,
@ -286,7 +288,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau
consul.CatalogCounters, consul.CatalogCounters,
consul.ClientCounters, consul.ClientCounters,
consul.RPCCounters, consul.RPCCounters,
grpc.StatsCounters, grpcWare.StatsCounters,
local.StateCounters, local.StateCounters,
xds.StatsCounters, xds.StatsCounters,
raftCounters, raftCounters,