Fix multiple issues related to proxycfg health queries. (#17241)
Fix multiple issues related to proxycfg health queries. 1. The datacenter was not being provided to a proxycfg query, which resulted in bypassing agentless query optimizations and using the normal API instead. 2. The health rpc endpoint would return a zero index when insufficient ACLs were detected. This would result in the agent cache performing an infinite loop of queries in rapid succession without backoff.
This commit is contained in:
parent
91f76b6fb2
commit
3ce5277217
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
connect: Fix multiple inefficient behaviors when querying service health.
|
||||||
|
```
|
|
@ -214,28 +214,10 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc
|
||||||
f = h.serviceNodesDefault
|
f = h.serviceNodesDefault
|
||||||
}
|
}
|
||||||
|
|
||||||
authzContext := acl.AuthorizerContext{
|
|
||||||
Peer: args.PeerName,
|
|
||||||
}
|
|
||||||
authz, err := h.srv.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.srv.validateEnterpriseRequest(&args.EnterpriseMeta, false); err != nil {
|
if err := h.srv.validateEnterpriseRequest(&args.EnterpriseMeta, false); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're doing a connect or ingress query, we need read access to the service
|
|
||||||
// we're trying to find proxies for, so check that.
|
|
||||||
if args.Connect || args.Ingress {
|
|
||||||
// TODO(acl-error-enhancements) Look for ways to percolate this information up to give any feedback to the user.
|
|
||||||
if authz.ServiceRead(args.ServiceName, &authzContext) != acl.Allow {
|
|
||||||
// Just return nil, which will return an empty response (tested)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filter, err := bexpr.CreateFilter(args.Filter, nil, reply.Nodes)
|
filter, err := bexpr.CreateFilter(args.Filter, nil, reply.Nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -257,6 +239,25 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
authzContext := acl.AuthorizerContext{
|
||||||
|
Peer: args.PeerName,
|
||||||
|
}
|
||||||
|
authz, err := h.srv.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we're doing a connect or ingress query, we need read access to the service
|
||||||
|
// we're trying to find proxies for, so check that.
|
||||||
|
if args.Connect || args.Ingress {
|
||||||
|
// TODO(acl-error-enhancements) Look for ways to percolate this information up to give any feedback to the user.
|
||||||
|
if authz.ServiceRead(args.ServiceName, &authzContext) != acl.Allow {
|
||||||
|
// Return the index here so that the agent cache does not infinitely loop.
|
||||||
|
reply.Index = index
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
resolvedNodes := nodes
|
resolvedNodes := nodes
|
||||||
if args.MergeCentralConfig {
|
if args.MergeCentralConfig {
|
||||||
for _, node := range resolvedNodes {
|
for _, node := range resolvedNodes {
|
||||||
|
|
|
@ -1127,6 +1127,7 @@ node "foo" {
|
||||||
var resp structs.IndexedCheckServiceNodes
|
var resp structs.IndexedCheckServiceNodes
|
||||||
assert.Nil(t, msgpackrpc.CallWithCodec(codec, "Health.ServiceNodes", &req, &resp))
|
assert.Nil(t, msgpackrpc.CallWithCodec(codec, "Health.ServiceNodes", &req, &resp))
|
||||||
assert.Len(t, resp.Nodes, 0)
|
assert.Len(t, resp.Nodes, 0)
|
||||||
|
assert.Greater(t, resp.Index, uint64(0))
|
||||||
|
|
||||||
// List w/ token. This should work since we're requesting "foo", but should
|
// List w/ token. This should work since we're requesting "foo", but should
|
||||||
// also only contain the proxies with names that adhere to our ACL.
|
// also only contain the proxies with names that adhere to our ACL.
|
||||||
|
|
|
@ -795,7 +795,7 @@ func TestState_WatchesAndUpdates(t *testing.T) {
|
||||||
fmt.Sprintf("upstream-target:api-failover-remote.default.default.dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-remote", "", "dc2", true),
|
fmt.Sprintf("upstream-target:api-failover-remote.default.default.dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-remote", "", "dc2", true),
|
||||||
fmt.Sprintf("upstream-target:api-failover-local.default.default.dc2:%s-failover-local?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-local", "", "dc2", true),
|
fmt.Sprintf("upstream-target:api-failover-local.default.default.dc2:%s-failover-local?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-local", "", "dc2", true),
|
||||||
fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true),
|
fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true),
|
||||||
upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "", "cluster-01", true),
|
upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "dc1", "cluster-01", true),
|
||||||
fmt.Sprintf("mesh-gateway:dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc2"),
|
fmt.Sprintf("mesh-gateway:dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc2"),
|
||||||
fmt.Sprintf("mesh-gateway:dc1:%s-failover-local?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc1"),
|
fmt.Sprintf("mesh-gateway:dc1:%s-failover-local?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc1"),
|
||||||
},
|
},
|
||||||
|
|
|
@ -316,8 +316,19 @@ func (s *handlerUpstreams) resetWatchesFromChain(
|
||||||
watchedChainEndpoints = true
|
watchedChainEndpoints = true
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := targetWatchOpts{upstreamID: uid}
|
opts := targetWatchOpts{
|
||||||
opts.fromChainTarget(target)
|
upstreamID: uid,
|
||||||
|
chainID: target.ID,
|
||||||
|
service: target.Service,
|
||||||
|
filter: target.Subset.Filter,
|
||||||
|
datacenter: target.Datacenter,
|
||||||
|
peer: target.Peer,
|
||||||
|
entMeta: target.GetEnterpriseMetadata(),
|
||||||
|
}
|
||||||
|
// Peering targets do not set the datacenter field, so we should default it here.
|
||||||
|
if opts.datacenter == "" {
|
||||||
|
opts.datacenter = s.source.Datacenter
|
||||||
|
}
|
||||||
|
|
||||||
err := s.watchUpstreamTarget(ctx, snap, opts)
|
err := s.watchUpstreamTarget(ctx, snap, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -435,15 +446,6 @@ type targetWatchOpts struct {
|
||||||
entMeta *acl.EnterpriseMeta
|
entMeta *acl.EnterpriseMeta
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *targetWatchOpts) fromChainTarget(t *structs.DiscoveryTarget) {
|
|
||||||
o.chainID = t.ID
|
|
||||||
o.service = t.Service
|
|
||||||
o.filter = t.Subset.Filter
|
|
||||||
o.datacenter = t.Datacenter
|
|
||||||
o.peer = t.Peer
|
|
||||||
o.entMeta = t.GetEnterpriseMetadata()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *ConfigSnapshotUpstreams, opts targetWatchOpts) error {
|
func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *ConfigSnapshotUpstreams, opts targetWatchOpts) error {
|
||||||
s.logger.Trace("initializing watch of target",
|
s.logger.Trace("initializing watch of target",
|
||||||
"upstream", opts.upstreamID,
|
"upstream", opts.upstreamID,
|
||||||
|
|
Loading…
Reference in New Issue