Fix issue with incorrect proxycfg watch on upstream peer-targets. (#15865)

This fixes an issue where the incorrect partition was given to the
upstream target watch, which meant that failover logic would not
work correctly.
This commit is contained in:
Derek Menteer 2023-01-03 10:44:08 -06:00 committed by GitHub
parent 76cc876ac3
commit 2af14c0084
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 33 deletions

3
.changelog/15865.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
connect: Fix issue where watches on upstream failover peer targets did not always query the correct data.
```

View File

@ -873,10 +873,10 @@ func (s *ConfigSnapshot) ToConfigSnapshotUpstreams() (*ConfigSnapshotUpstreams,
}
}
func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) structs.PeeringServiceMeta {
func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) (structs.PeeringServiceMeta, bool) {
nodes, _ := u.PeerUpstreamEndpoints.Get(uid)
if len(nodes) == 0 {
return structs.PeeringServiceMeta{}
return structs.PeeringServiceMeta{}, false
}
// In agent/rpc/peering/subscription_manager.go we denormalize the
@ -892,9 +892,9 @@ func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) structs.Peeri
// catalog to avoid this weird construction.
csn := nodes[0]
if csn.Service == nil {
return structs.PeeringServiceMeta{}
return structs.PeeringServiceMeta{}, false
}
return *csn.Service.Connect.PeerMeta
return *csn.Service.Connect.PeerMeta, true
}
// PeeredUpstreamIDs returns a slice of peered UpstreamIDs from explicit config entries

View File

@ -98,7 +98,6 @@ func (s *handlerUpstreams) handleUpdateUpstreams(ctx context.Context, u UpdateEv
return fmt.Errorf("invalid type for response: %T", u.Result)
}
uidString := strings.TrimPrefix(u.CorrelationID, upstreamPeerWatchIDPrefix)
uid := UpstreamIDFromString(uidString)
s.setPeerEndpoints(upstreamsSnapshot, uid, resp.Nodes)
@ -306,15 +305,9 @@ func (s *handlerUpstreams) resetWatchesFromChain(
watchedChainEndpoints = true
}
opts := targetWatchOpts{
upstreamID: uid,
chainID: target.ID,
service: target.Service,
filter: target.Subset.Filter,
datacenter: target.Datacenter,
peer: target.Peer,
entMeta: target.GetEnterpriseMetadata(),
}
opts := targetWatchOpts{upstreamID: uid}
opts.fromChainTarget(chain, target)
err := s.watchUpstreamTarget(ctx, snap, opts)
if err != nil {
return fmt.Errorf("failed to watch target %q for upstream %q", target.ID, uid)
@ -431,6 +424,32 @@ type targetWatchOpts struct {
entMeta *acl.EnterpriseMeta
}
func (o *targetWatchOpts) fromChainTarget(c *structs.CompiledDiscoveryChain, t *structs.DiscoveryTarget) {
o.chainID = t.ID
o.service = t.Service
o.filter = t.Subset.Filter
o.datacenter = t.Datacenter
o.peer = t.Peer
o.entMeta = t.GetEnterpriseMetadata()
// The peer-targets in a discovery chain intentionally clear out
// the partition field, since we don't know the remote service's partition.
// Therefore, we must query with the chain's local partition / DC, or else
// the services will not be found.
//
// Note that the namespace is not swapped out, because it should
// always match the value in the remote datacenter (and shouldn't
// have been changed anywhere).
if o.peer != "" {
o.datacenter = ""
// Clone the enterprise meta so it's not modified when we swap the partition.
var em acl.EnterpriseMeta
em.Merge(o.entMeta)
em.OverridePartition(c.Partition)
o.entMeta = &em
}
}
func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *ConfigSnapshotUpstreams, opts targetWatchOpts) error {
s.logger.Trace("initializing watch of target",
"upstream", opts.upstreamID,
@ -438,9 +457,6 @@ func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *Config
"target", opts.chainID,
)
var finalMeta acl.EnterpriseMeta
finalMeta.Merge(opts.entMeta)
uid := opts.upstreamID
correlationID := "upstream-target:" + opts.chainID + ":" + uid.String()
@ -449,6 +465,10 @@ func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *Config
correlationID = upstreamPeerWatchIDPrefix + uid.String()
}
// Perform this merge so that a nil EntMeta isn't possible.
var entMeta acl.EnterpriseMeta
entMeta.Merge(opts.entMeta)
ctx, cancel := context.WithCancel(ctx)
err := s.dataSources.Health.Notify(ctx, &structs.ServiceSpecificRequest{
PeerName: opts.peer,
@ -463,7 +483,7 @@ func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *Config
// the default and makes metrics and other things much cleaner. It's
// simpler for us if we have the type to make things unambiguous.
Source: *s.source,
EnterpriseMeta: finalMeta,
EnterpriseMeta: entMeta,
}, correlationID, s.ch)
if err != nil {

View File

@ -3,7 +3,6 @@ package xds
import (
"errors"
"fmt"
"sort"
"strings"
"time"
@ -129,7 +128,10 @@ func (s *ResourceGenerator) clustersFromSnapshotConnectProxy(cfgSnap *proxycfg.C
continue
}
peerMeta := cfgSnap.ConnectProxy.UpstreamPeerMeta(uid)
peerMeta, found := cfgSnap.ConnectProxy.UpstreamPeerMeta(uid)
if !found {
s.Logger.Warn("failed to fetch upstream peering metadata for cluster", "uid", uid)
}
cfg := s.getAndModifyUpstreamConfigForPeeredListener(uid, upstream, peerMeta)
upstreamCluster, err := s.makeUpstreamClusterForPeerService(uid, cfg, peerMeta, cfgSnap)
@ -1296,18 +1298,13 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
for _, targetData := range targetClustersData {
target := chain.Targets[targetData.targetID]
sni := target.SNI
var additionalSpiffeIDs []string
targetSpiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: target.Namespace,
Partition: target.Partition,
Datacenter: target.Datacenter,
Service: target.Service,
}.URI().String()
targetUID := proxycfg.NewUpstreamIDFromTargetID(targetData.targetID)
if targetUID.Peer != "" {
peerMeta := upstreamsSnapshot.UpstreamPeerMeta(targetUID)
peerMeta, found := upstreamsSnapshot.UpstreamPeerMeta(targetUID)
if !found {
s.Logger.Warn("failed to fetch upstream peering metadata for cluster", "target", targetUID)
}
upstreamCluster, err := s.makeUpstreamClusterForPeerService(targetUID, upstreamConfig, peerMeta, cfgSnap)
if err != nil {
continue
@ -1318,6 +1315,14 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
continue
}
targetSpiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: target.Namespace,
Partition: target.Partition,
Datacenter: target.Datacenter,
Service: target.Service,
}.URI().String()
s.Logger.Debug("generating cluster for", "cluster", targetData.clusterName)
c := &envoy_cluster_v3.Cluster{
Name: targetData.clusterName,
@ -1371,9 +1376,7 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
makeTLSParametersFromProxyTLSConfig(cfgSnap.MeshConfigTLSOutgoing()),
)
spiffeIDs := append([]string{targetSpiffeID}, additionalSpiffeIDs...)
sort.Strings(spiffeIDs)
err = injectSANMatcher(commonTLSContext, spiffeIDs...)
err = injectSANMatcher(commonTLSContext, targetSpiffeID)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}

View File

@ -363,7 +363,10 @@ func (s *ResourceGenerator) listenersFromSnapshotConnectProxy(cfgSnap *proxycfg.
continue
}
peerMeta := cfgSnap.ConnectProxy.UpstreamPeerMeta(uid)
peerMeta, found := cfgSnap.ConnectProxy.UpstreamPeerMeta(uid)
if !found {
s.Logger.Warn("failed to fetch upstream peering metadata for listener", "uid", uid)
}
cfg := s.getAndModifyUpstreamConfigForPeeredListener(uid, upstreamCfg, peerMeta)
// If escape hatch is present, create a listener from it and move on to the next