From bed8716e442d2e2d14b587ae10990361328a7ea8 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 26 Jan 2023 15:05:51 -0500 Subject: [PATCH] metrics: Add metrics to unauthenticated endpoints (#15899) --- nomad/regions_endpoint.go | 5 +++++ nomad/stats_fetcher.go | 2 +- nomad/status_endpoint.go | 25 +++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/nomad/regions_endpoint.go b/nomad/regions_endpoint.go index bd6db9723..488dd1bd9 100644 --- a/nomad/regions_endpoint.go +++ b/nomad/regions_endpoint.go @@ -21,6 +21,11 @@ func NewRegionEndpoint(srv *Server, ctx *RPCContext) *Region { // required for this endpoint because memberlist is used to populate the // peers list we read from. func (r *Region) List(args *structs.GenericRequest, reply *[]string) error { + // note: we're intentionally throwing away any auth error here and only + // authenticate so that we can measure rate metrics + r.srv.Authenticate(r.ctx, args) + r.srv.MeasureRPCRate("region", structs.RateMetricList, args) + *reply = r.srv.Regions() return nil } diff --git a/nomad/stats_fetcher.go b/nomad/stats_fetcher.go index d06bffba5..c01e98c86 100644 --- a/nomad/stats_fetcher.go +++ b/nomad/stats_fetcher.go @@ -43,7 +43,7 @@ func NewStatsFetcher(logger log.Logger, pool *pool.ConnPool, region string) *Sta // RPC to each server, so we let it finish and then clean up the in-flight // tracking. func (f *StatsFetcher) fetch(server *autopilot.Server, replyCh chan *autopilot.ServerStats) { - var args struct{} + var args structs.GenericRequest var reply structs.RaftStats // defer some cleanup to notify everything else that the fetching is no longer occurring diff --git a/nomad/status_endpoint.go b/nomad/status_endpoint.go index 4b2edf2b8..f5b53a5c9 100644 --- a/nomad/status_endpoint.go +++ b/nomad/status_endpoint.go @@ -26,11 +26,17 @@ func (s *Status) Ping(args structs.GenericRequest, reply *struct{}) error { // note: we're intentionally throwing away any auth error here and only // authenticate so that we can measure rate metrics s.srv.Authenticate(s.ctx, &args) + s.srv.MeasureRPCRate("status", structs.RateMetricRead, &args) return nil } // Leader is used to get the address of the leader func (s *Status) Leader(args *structs.GenericRequest, reply *string) error { + // note: we're intentionally throwing away any auth error here and only + // authenticate so that we can measure rate metrics + s.srv.Authenticate(s.ctx, args) + s.srv.MeasureRPCRate("status", structs.RateMetricRead, args) + if args.Region == "" { args.Region = s.srv.config.Region } @@ -49,6 +55,11 @@ func (s *Status) Leader(args *structs.GenericRequest, reply *string) error { // Peers is used to get all the Raft peers func (s *Status) Peers(args *structs.GenericRequest, reply *[]string) error { + // note: we're intentionally throwing away any auth error here and only + // authenticate so that we can measure rate metrics + s.srv.Authenticate(s.ctx, args) + s.srv.MeasureRPCRate("status", structs.RateMetricList, args) + if args.Region == "" { args.Region = s.srv.config.Region } @@ -71,7 +82,7 @@ func (s *Status) Peers(args *structs.GenericRequest, reply *[]string) error { // aware of func (s *Status) Members(args *structs.GenericRequest, reply *structs.ServerMembersResponse) error { authErr := s.srv.Authenticate(s.ctx, args) - s.srv.MeasureRPCRate("status", structs.RateMetricRead, args) + s.srv.MeasureRPCRate("status", structs.RateMetricList, args) if authErr != nil { return structs.ErrPermissionDenied } @@ -109,7 +120,12 @@ func (s *Status) Members(args *structs.GenericRequest, reply *structs.ServerMemb } // RaftStats is used by Autopilot to query the raft stats of the local server. -func (s *Status) RaftStats(args struct{}, reply *structs.RaftStats) error { +func (s *Status) RaftStats(args *structs.GenericRequest, reply *structs.RaftStats) error { + // note: we're intentionally throwing away any auth error here and only + // authenticate so that we can measure rate metrics + s.srv.Authenticate(s.ctx, args) + s.srv.MeasureRPCRate("status", structs.RateMetricRead, args) + stats := s.srv.raft.Stats() var err error @@ -129,6 +145,11 @@ func (s *Status) RaftStats(args struct{}, reply *structs.RaftStats) error { // HasNodeConn returns whether the server has a connection to the requested // Node. func (s *Status) HasNodeConn(args *structs.NodeSpecificRequest, reply *structs.NodeConnQueryResponse) error { + // note: we're intentionally throwing away any auth error here and only + // authenticate so that we can measure rate metrics + s.srv.Authenticate(s.ctx, args) + s.srv.MeasureRPCRate("status", structs.RateMetricRead, args) + // Validate the args if args.NodeID == "" { return errors.New("Must provide the NodeID")