From 3ce52772176d225fadb771597cf3fd9fd75631ee Mon Sep 17 00:00:00 2001 From: Derek Menteer <105233703+hashi-derek@users.noreply.github.com> Date: Tue, 9 May 2023 12:37:58 -0500 Subject: [PATCH] Fix multiple issues related to proxycfg health queries. (#17241) Fix multiple issues related to proxycfg health queries. 1. The datacenter was not being provided to a proxycfg query, which resulted in bypassing agentless query optimizations and using the normal API instead. 2. The health rpc endpoint would return a zero index when insufficient ACLs were detected. This would result in the agent cache performing an infinite loop of queries in rapid succession without backoff. --- .changelog/17241.txt | 3 +++ agent/consul/health_endpoint.go | 37 ++++++++++++++-------------- agent/consul/health_endpoint_test.go | 1 + agent/proxycfg/state_test.go | 2 +- agent/proxycfg/upstreams.go | 24 +++++++++--------- 5 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 .changelog/17241.txt diff --git a/.changelog/17241.txt b/.changelog/17241.txt new file mode 100644 index 000000000..036971092 --- /dev/null +++ b/.changelog/17241.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: Fix multiple inefficient behaviors when querying service health. +``` diff --git a/agent/consul/health_endpoint.go b/agent/consul/health_endpoint.go index abf17adba..6913136d3 100644 --- a/agent/consul/health_endpoint.go +++ b/agent/consul/health_endpoint.go @@ -214,28 +214,10 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc f = h.serviceNodesDefault } - authzContext := acl.AuthorizerContext{ - Peer: args.PeerName, - } - authz, err := h.srv.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext) - if err != nil { - return err - } - if err := h.srv.validateEnterpriseRequest(&args.EnterpriseMeta, false); err != nil { return err } - // If we're doing a connect or ingress query, we need read access to the service - // we're trying to find proxies for, so check that. - if args.Connect || args.Ingress { - // TODO(acl-error-enhancements) Look for ways to percolate this information up to give any feedback to the user. - if authz.ServiceRead(args.ServiceName, &authzContext) != acl.Allow { - // Just return nil, which will return an empty response (tested) - return nil - } - } - filter, err := bexpr.CreateFilter(args.Filter, nil, reply.Nodes) if err != nil { return err @@ -257,6 +239,25 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc return err } + authzContext := acl.AuthorizerContext{ + Peer: args.PeerName, + } + authz, err := h.srv.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext) + if err != nil { + return err + } + + // If we're doing a connect or ingress query, we need read access to the service + // we're trying to find proxies for, so check that. + if args.Connect || args.Ingress { + // TODO(acl-error-enhancements) Look for ways to percolate this information up to give any feedback to the user. + if authz.ServiceRead(args.ServiceName, &authzContext) != acl.Allow { + // Return the index here so that the agent cache does not infinitely loop. + reply.Index = index + return nil + } + } + resolvedNodes := nodes if args.MergeCentralConfig { for _, node := range resolvedNodes { diff --git a/agent/consul/health_endpoint_test.go b/agent/consul/health_endpoint_test.go index 4b492081c..1981332d8 100644 --- a/agent/consul/health_endpoint_test.go +++ b/agent/consul/health_endpoint_test.go @@ -1127,6 +1127,7 @@ node "foo" { var resp structs.IndexedCheckServiceNodes assert.Nil(t, msgpackrpc.CallWithCodec(codec, "Health.ServiceNodes", &req, &resp)) assert.Len(t, resp.Nodes, 0) + assert.Greater(t, resp.Index, uint64(0)) // List w/ token. This should work since we're requesting "foo", but should // also only contain the proxies with names that adhere to our ACL. diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index aa11dc43f..792367b47 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -795,7 +795,7 @@ func TestState_WatchesAndUpdates(t *testing.T) { fmt.Sprintf("upstream-target:api-failover-remote.default.default.dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-remote", "", "dc2", true), fmt.Sprintf("upstream-target:api-failover-local.default.default.dc2:%s-failover-local?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-local", "", "dc2", true), fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true), - upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "", "cluster-01", true), + upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "dc1", "cluster-01", true), fmt.Sprintf("mesh-gateway:dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc2"), fmt.Sprintf("mesh-gateway:dc1:%s-failover-local?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc1"), }, diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go index d7fee4a3f..5e42072fa 100644 --- a/agent/proxycfg/upstreams.go +++ b/agent/proxycfg/upstreams.go @@ -316,8 +316,19 @@ func (s *handlerUpstreams) resetWatchesFromChain( watchedChainEndpoints = true } - opts := targetWatchOpts{upstreamID: uid} - opts.fromChainTarget(target) + opts := targetWatchOpts{ + upstreamID: uid, + chainID: target.ID, + service: target.Service, + filter: target.Subset.Filter, + datacenter: target.Datacenter, + peer: target.Peer, + entMeta: target.GetEnterpriseMetadata(), + } + // Peering targets do not set the datacenter field, so we should default it here. + if opts.datacenter == "" { + opts.datacenter = s.source.Datacenter + } err := s.watchUpstreamTarget(ctx, snap, opts) if err != nil { @@ -435,15 +446,6 @@ type targetWatchOpts struct { entMeta *acl.EnterpriseMeta } -func (o *targetWatchOpts) fromChainTarget(t *structs.DiscoveryTarget) { - o.chainID = t.ID - o.service = t.Service - o.filter = t.Subset.Filter - o.datacenter = t.Datacenter - o.peer = t.Peer - o.entMeta = t.GetEnterpriseMetadata() -} - func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *ConfigSnapshotUpstreams, opts targetWatchOpts) error { s.logger.Trace("initializing watch of target", "upstream", opts.upstreamID,