From 4626b65124f7c36190c03166f0efb991046a41b8 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" <4903+rboyer@users.noreply.github.com> Date: Fri, 10 Jun 2022 16:11:40 -0500 Subject: [PATCH] xds: allow for peered upstreams to use tagged addresses that are hostnames (#13422) Mesh gateways can use hostnames in their tagged addresses (#7999). This is useful if you were to expose a mesh gateway using a cloud networking load balancer appliance that gives you a DNS name but no reliable static IPs. Envoy cannot accept hostnames via EDS and those must be configured using CDS. There was already logic when configuring gateways in other locations in the code, but given the illusions in play for peering the downstream of a peered service wasn't aware that it should be doing that. Also: - ensuring that we always try to use wan-like addresses to cross peer boundaries. --- agent/proxycfg/connect_proxy.go | 1 + agent/proxycfg/manager_test.go | 18 ++-- agent/proxycfg/snapshot.go | 6 +- agent/proxycfg/testing_peering.go | 10 +++ agent/proxycfg/upstreams.go | 13 ++- agent/xds/clusters.go | 87 ++++++++++++++----- agent/xds/endpoints.go | 8 +- ...-proxy-with-peered-upstreams.latest.golden | 33 +++++-- 8 files changed, 133 insertions(+), 43 deletions(-) diff --git a/agent/proxycfg/connect_proxy.go b/agent/proxycfg/connect_proxy.go index 4bbc00201..0f62767c6 100644 --- a/agent/proxycfg/connect_proxy.go +++ b/agent/proxycfg/connect_proxy.go @@ -32,6 +32,7 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e snap.ConnectProxy.PassthroughUpstreams = make(map[UpstreamID]map[string]map[string]struct{}) snap.ConnectProxy.PassthroughIndices = make(map[string]indexedTarget) snap.ConnectProxy.PeerUpstreamEndpoints = make(map[UpstreamID]structs.CheckServiceNodes) + snap.ConnectProxy.PeerUpstreamEndpointsUseHostnames = make(map[UpstreamID]struct{}) // Watch for root changes err := s.dataSources.CARoots.Notify(ctx, &structs.DCSpecificRequest{ diff --git a/agent/proxycfg/manager_test.go b/agent/proxycfg/manager_test.go index af28c5578..08a54b11b 100644 --- a/agent/proxycfg/manager_test.go +++ b/agent/proxycfg/manager_test.go @@ -236,10 +236,11 @@ func TestManager_BasicLifecycle(t *testing.T) { NewUpstreamID(&upstreams[1]): &upstreams[1], NewUpstreamID(&upstreams[2]): &upstreams[2], }, - PassthroughUpstreams: map[UpstreamID]map[string]map[string]struct{}{}, - PassthroughIndices: map[string]indexedTarget{}, - PeerTrustBundles: map[string]*pbpeering.PeeringTrustBundle{}, - PeerUpstreamEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + PassthroughUpstreams: map[UpstreamID]map[string]map[string]struct{}{}, + PassthroughIndices: map[string]indexedTarget{}, + PeerTrustBundles: map[string]*pbpeering.PeeringTrustBundle{}, + PeerUpstreamEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + PeerUpstreamEndpointsUseHostnames: map[UpstreamID]struct{}{}, }, PreparedQueryEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, WatchedServiceChecks: map[structs.ServiceID][]structs.CheckType{}, @@ -296,10 +297,11 @@ func TestManager_BasicLifecycle(t *testing.T) { NewUpstreamID(&upstreams[1]): &upstreams[1], NewUpstreamID(&upstreams[2]): &upstreams[2], }, - PassthroughUpstreams: map[UpstreamID]map[string]map[string]struct{}{}, - PassthroughIndices: map[string]indexedTarget{}, - PeerTrustBundles: map[string]*pbpeering.PeeringTrustBundle{}, - PeerUpstreamEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + PassthroughUpstreams: map[UpstreamID]map[string]map[string]struct{}{}, + PassthroughIndices: map[string]indexedTarget{}, + PeerTrustBundles: map[string]*pbpeering.PeeringTrustBundle{}, + PeerUpstreamEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + PeerUpstreamEndpointsUseHostnames: map[UpstreamID]struct{}{}, }, PreparedQueryEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, WatchedServiceChecks: map[structs.ServiceID][]structs.CheckType{}, diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 1bcf05f88..5f8ddd560 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -83,7 +83,8 @@ type ConfigSnapshotUpstreams struct { // PeerUpstreamEndpoints is a map of UpstreamID -> (set of IP addresses) // and used to determine the backing endpoints of an upstream in another // peer. - PeerUpstreamEndpoints map[UpstreamID]structs.CheckServiceNodes + PeerUpstreamEndpoints map[UpstreamID]structs.CheckServiceNodes + PeerUpstreamEndpointsUseHostnames map[UpstreamID]struct{} } // indexedTarget is used to associate the Raft modify index of a resource @@ -162,7 +163,8 @@ func (c *configSnapshotConnectProxy) isEmpty() bool { len(c.IntentionUpstreams) == 0 && !c.PeeringTrustBundlesSet && !c.MeshConfigSet && - len(c.PeerUpstreamEndpoints) == 0 + len(c.PeerUpstreamEndpoints) == 0 && + len(c.PeerUpstreamEndpointsUseHostnames) == 0 } type configSnapshotTerminatingGateway struct { diff --git a/agent/proxycfg/testing_peering.go b/agent/proxycfg/testing_peering.go index db113869b..9b1973c9a 100644 --- a/agent/proxycfg/testing_peering.go +++ b/agent/proxycfg/testing_peering.go @@ -51,6 +51,16 @@ func TestConfigSnapshotPeering(t testing.T) *ConfigSnapshot { Service: "payments-sidecar-proxy", Kind: structs.ServiceKindConnectProxy, Port: 443, + TaggedAddresses: map[string]structs.ServiceAddress{ + structs.TaggedAddressLAN: { + Address: "85.252.102.31", + Port: 443, + }, + structs.TaggedAddressWAN: { + Address: "123.us-east-1.elb.notaws.com", + Port: 8443, + }, + }, Connect: structs.ServiceConnect{ PeerMeta: &structs.PeeringServiceMeta{ SNI: []string{ diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go index 66f65a41e..1f1251c42 100644 --- a/agent/proxycfg/upstreams.go +++ b/agent/proxycfg/upstreams.go @@ -97,7 +97,18 @@ func (s *handlerUpstreams) handleUpdateUpstreams(ctx context.Context, u UpdateEv uid := UpstreamIDFromString(uidString) - upstreamsSnapshot.PeerUpstreamEndpoints[uid] = resp.Nodes + filteredNodes := hostnameEndpoints( + s.logger, + GatewayKey{ /*empty so it never matches*/ }, + resp.Nodes, + ) + if len(filteredNodes) > 0 { + upstreamsSnapshot.PeerUpstreamEndpoints[uid] = filteredNodes + upstreamsSnapshot.PeerUpstreamEndpointsUseHostnames[uid] = struct{}{} + } else { + upstreamsSnapshot.PeerUpstreamEndpoints[uid] = resp.Nodes + delete(upstreamsSnapshot.PeerUpstreamEndpointsUseHostnames, uid) + } if s.kind != structs.ServiceKindConnectProxy || s.proxyCfg.Mode != structs.ProxyModeTransparent { return nil diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index 7adaf7cf3..bff070850 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -13,10 +13,12 @@ import ( envoy_upstreams_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/upstreams/http/v3" envoy_matcher_v3 "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" envoy_type_v3 "github.com/envoyproxy/go-control-plane/envoy/type/v3" + "github.com/golang/protobuf/jsonpb" "github.com/golang/protobuf/proto" "github.com/golang/protobuf/ptypes/any" "github.com/golang/protobuf/ptypes/wrappers" + "github.com/hashicorp/go-hclog" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/durationpb" @@ -575,23 +577,14 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( s.Logger.Trace("generating cluster for", "cluster", clusterName) if c == nil { c = &envoy_cluster_v3.Cluster{ - Name: clusterName, - AltStatName: clusterName, - ConnectTimeout: durationpb.New(time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond), - ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}, + Name: clusterName, + AltStatName: clusterName, + ConnectTimeout: durationpb.New(time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond), CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{ HealthyPanicThreshold: &envoy_type_v3.Percent{ Value: 0, // disable panic threshold }, }, - EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{ - EdsConfig: &envoy_core_v3.ConfigSource{ - ResourceApiVersion: envoy_core_v3.ApiVersion_V3, - ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{ - Ads: &envoy_core_v3.AggregatedConfigSource{}, - }, - }, - }, CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{ Thresholds: makeThresholdsIfNeeded(cfg.Limits), }, @@ -602,6 +595,35 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( return c, err } } + + useEDS := true + if _, ok := cfgSnap.ConnectProxy.PeerUpstreamEndpointsUseHostnames[uid]; ok { + useEDS = false + } + + // If none of the service instances are addressed by a hostname we + // provide the endpoint IP addresses via EDS + if useEDS { + c.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS} + c.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{ + EdsConfig: &envoy_core_v3.ConfigSource{ + ResourceApiVersion: envoy_core_v3.ApiVersion_V3, + ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{ + Ads: &envoy_core_v3.AggregatedConfigSource{}, + }, + }, + } + } else { + configureClusterWithHostnames( + s.Logger, + c, + "", /*TODO:make configurable?*/ + cfgSnap.ConnectProxy.PeerUpstreamEndpoints[uid], + true, /*isRemote*/ + false, /*onlyPassing*/ + ) + } + } rootPEMs := cfgSnap.RootPEMs() @@ -1054,9 +1076,31 @@ func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, op }, }, } - return cluster + } else { + configureClusterWithHostnames( + s.Logger, + cluster, + cfg.DNSDiscoveryType, + opts.hostnameEndpoints, + opts.isRemote, + opts.onlyPassing, + ) } + return cluster +} + +func configureClusterWithHostnames( + logger hclog.Logger, + cluster *envoy_cluster_v3.Cluster, + dnsDiscoveryType string, + // hostnameEndpoints is a list of endpoints with a hostname as their address + hostnameEndpoints structs.CheckServiceNodes, + // isRemote determines whether the cluster is in a remote DC or partition and we should prefer a WAN address + isRemote bool, + // onlyPassing determines whether endpoints that do not have a passing status should be considered unhealthy + onlyPassing bool, +) { // When a service instance is addressed by a hostname we have Envoy do the DNS resolution // by setting a DNS cluster type and passing the hostname endpoints via CDS. rate := 10 * time.Second @@ -1064,7 +1108,7 @@ func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, op cluster.DnsLookupFamily = envoy_cluster_v3.Cluster_V4_ONLY discoveryType := envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_LOGICAL_DNS} - if cfg.DNSDiscoveryType == "strict_dns" { + if dnsDiscoveryType == "strict_dns" { discoveryType.Type = envoy_cluster_v3.Cluster_STRICT_DNS } cluster.ClusterDiscoveryType = &discoveryType @@ -1077,11 +1121,11 @@ func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, op idx int fallback *envoy_endpoint_v3.LbEndpoint ) - for i, e := range opts.hostnameEndpoints { - _, addr, port := e.BestAddress(opts.isRemote) + for i, e := range hostnameEndpoints { + _, addr, port := e.BestAddress(isRemote) uniqueHostnames[addr] = true - health, weight := calculateEndpointHealthAndWeight(e, opts.onlyPassing) + health, weight := calculateEndpointHealthAndWeight(e, onlyPassing) if health == envoy_core_v3.HealthStatus_UNHEALTHY { fallback = makeLbEndpoint(addr, port, health, weight) continue @@ -1096,18 +1140,18 @@ func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, op } } - dc := opts.hostnameEndpoints[idx].Node.Datacenter - service := opts.hostnameEndpoints[idx].Service.CompoundServiceName() + dc := hostnameEndpoints[idx].Node.Datacenter + service := hostnameEndpoints[idx].Service.CompoundServiceName() // Fall back to last unhealthy endpoint if none were healthy if len(endpoints) == 0 { - s.Logger.Warn("upstream service does not contain any healthy instances", + logger.Warn("upstream service does not contain any healthy instances", "dc", dc, "service", service.String()) endpoints = append(endpoints, fallback) } if len(uniqueHostnames) > 1 { - s.Logger.Warn(fmt.Sprintf("service contains instances with more than one unique hostname; only %q be resolved by Envoy", hostname), + logger.Warn(fmt.Sprintf("service contains instances with more than one unique hostname; only %q be resolved by Envoy", hostname), "dc", dc, "service", service.String()) } @@ -1119,7 +1163,6 @@ func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, op }, }, } - return cluster } func makeThresholdsIfNeeded(limits *structs.UpstreamLimits) []*envoy_cluster_v3.CircuitBreakers_Thresholds { diff --git a/agent/xds/endpoints.go b/agent/xds/endpoints.go index 0122feb4e..2a60c05ec 100644 --- a/agent/xds/endpoints.go +++ b/agent/xds/endpoints.go @@ -96,6 +96,12 @@ func (s *ResourceGenerator) endpointsFromSnapshotConnectProxy(cfgSnap *proxycfg. clusterName = uid.EnvoyID() } + // Also skip peer instances with a hostname as their address. EDS + // cannot resolve hostnames, so we provide them through CDS instead. + if _, ok := cfgSnap.ConnectProxy.PeerUpstreamEndpointsUseHostnames[uid]; ok { + continue + } + endpoints, ok := cfgSnap.ConnectProxy.PeerUpstreamEndpoints[uid] if ok { la := makeLoadAssignment( @@ -103,7 +109,7 @@ func (s *ResourceGenerator) endpointsFromSnapshotConnectProxy(cfgSnap *proxycfg. []loadAssignmentEndpointGroup{ {Endpoints: endpoints}, }, - cfgSnap.Locality, + proxycfg.GatewayKey{ /*empty so it never matches*/ }, ) resources = append(resources, la) } diff --git a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden index c2606f659..c6594a230 100644 --- a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden +++ b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden @@ -30,19 +30,34 @@ "@type": "type.googleapis.com/envoy.config.cluster.v3.Cluster", "name": "payments.default.default.cloud.external.1c053652-8512-4373-90cf-5a7f6263a994.consul", "altStatName": "payments.default.default.cloud.external.1c053652-8512-4373-90cf-5a7f6263a994.consul", - "type": "EDS", - "edsClusterConfig": { - "edsConfig": { - "ads": { - - }, - "resourceApiVersion": "V3" - } - }, + "type": "LOGICAL_DNS", "connectTimeout": "5s", + "loadAssignment": { + "clusterName": "payments.default.default.cloud.external.1c053652-8512-4373-90cf-5a7f6263a994.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "123.us-east-1.elb.notaws.com", + "portValue": 8443 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, "circuitBreakers": { }, + "dnsRefreshRate": "10s", + "dnsLookupFamily": "V4_ONLY", "outlierDetection": { },