open-consul/agent/xds/clusters.go

1898 lines
63 KiB
Go
Raw Normal View History

package xds
import (
"errors"
"fmt"
"sort"
"strings"
"time"
envoy_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
envoy_core_v3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
envoy_endpoint_v3 "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
envoy_aggregate_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3"
envoy_tls_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3"
envoy_upstreams_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/upstreams/http/v3"
envoy_matcher_v3 "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3"
envoy_type_v3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/golang/protobuf/jsonpb"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes/any"
"github.com/golang/protobuf/ptypes/wrappers"
"github.com/hashicorp/go-hclog"
"google.golang.org/protobuf/types/known/anypb"
"google.golang.org/protobuf/types/known/durationpb"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/proxycfg"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/proto/pbpeering"
)
const (
meshGatewayExportedClusterNamePrefix = "exported~"
failoverClusterNamePrefix = "failover-target~"
)
// clustersFromSnapshot returns the xDS API representation of the "clusters" in the snapshot.
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) clustersFromSnapshot(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
if cfgSnap == nil {
return nil, errors.New("nil config given")
}
switch cfgSnap.Kind {
case structs.ServiceKindConnectProxy:
2020-03-27 21:57:16 +00:00
return s.clustersFromSnapshotConnectProxy(cfgSnap)
case structs.ServiceKindTerminatingGateway:
res, err := s.clustersFromSnapshotTerminatingGateway(cfgSnap)
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
if err != nil {
return nil, err
}
2021-09-22 18:48:50 +00:00
return res, nil
case structs.ServiceKindMeshGateway:
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
res, err := s.clustersFromSnapshotMeshGateway(cfgSnap)
if err != nil {
return nil, err
}
2021-09-22 18:48:50 +00:00
return res, nil
case structs.ServiceKindIngressGateway:
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
res, err := s.clustersFromSnapshotIngressGateway(cfgSnap)
if err != nil {
return nil, err
}
2021-09-22 18:48:50 +00:00
return res, nil
default:
return nil, fmt.Errorf("Invalid service kind: %v", cfgSnap.Kind)
}
}
// clustersFromSnapshot returns the xDS API representation of the "clusters"
// (upstreams) in the snapshot.
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) clustersFromSnapshotConnectProxy(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
2021-03-17 19:40:49 +00:00
// This sizing is a lower bound.
clusters := make([]proto.Message, 0, len(cfgSnap.ConnectProxy.DiscoveryChain)+1)
// Include the "app" cluster for the public listener
appCluster, err := s.makeAppCluster(cfgSnap, LocalAppClusterName, "", cfgSnap.Proxy.LocalServicePort)
if err != nil {
return nil, err
}
clusters = append(clusters, appCluster)
if cfgSnap.Proxy.Mode == structs.ProxyModeTransparent {
passthroughs, err := makePassthroughClusters(cfgSnap)
if err != nil {
return nil, fmt.Errorf("failed to make passthrough clusters for transparent proxy: %v", err)
}
clusters = append(clusters, passthroughs...)
2021-03-17 19:40:49 +00:00
}
getUpstream := func(uid proxycfg.UpstreamID) (*structs.Upstream, bool) {
upstream := cfgSnap.ConnectProxy.UpstreamConfig[uid]
explicit := upstream.HasLocalPortOrSocket()
implicit := cfgSnap.ConnectProxy.IsImplicitUpstream(uid)
return upstream, !implicit && !explicit
}
// NOTE: Any time we skip a chain below we MUST also skip that discovery chain in endpoints.go
// so that the sets of endpoints generated matches the sets of clusters.
for uid, chain := range cfgSnap.ConnectProxy.DiscoveryChain {
upstream, skip := getUpstream(uid)
if skip {
continue
2021-03-17 19:40:49 +00:00
}
upstreamClusters, err := s.makeUpstreamClustersForDiscoveryChain(
uid,
upstream,
chain,
cfgSnap,
false,
)
2021-03-17 19:40:49 +00:00
if err != nil {
return nil, err
}
2021-03-17 19:40:49 +00:00
for _, cluster := range upstreamClusters {
clusters = append(clusters, cluster)
}
}
// NOTE: Any time we skip an upstream below we MUST also skip that same
// upstream in endpoints.go so that the sets of endpoints generated matches
// the sets of clusters.
for _, uid := range cfgSnap.ConnectProxy.PeeredUpstreamIDs() {
upstream, skip := getUpstream(uid)
if skip {
continue
}
peerMeta := cfgSnap.ConnectProxy.UpstreamPeerMeta(uid)
cfg := s.getAndModifyUpstreamConfigForPeeredListener(uid, upstream, peerMeta)
upstreamCluster, err := s.makeUpstreamClusterForPeerService(uid, cfg, peerMeta, cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, upstreamCluster)
}
2021-03-17 19:40:49 +00:00
for _, u := range cfgSnap.Proxy.Upstreams {
if u.DestinationType != structs.UpstreamDestTypePreparedQuery {
continue
}
2021-03-17 19:40:49 +00:00
upstreamCluster, err := s.makeUpstreamClusterForPreparedQuery(u, cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, upstreamCluster)
}
cfgSnap.Proxy.Expose.Finalize()
paths := cfgSnap.Proxy.Expose.Paths
// Add service health checks to the list of paths to create clusters for if needed
if cfgSnap.Proxy.Expose.Checks {
psid := structs.NewServiceID(cfgSnap.Proxy.DestinationServiceID, &cfgSnap.ProxyID.EnterpriseMeta)
for _, check := range cfgSnap.ConnectProxy.WatchedServiceChecks[psid] {
p, err := parseCheckPath(check)
if err != nil {
s.Logger.Warn("failed to create cluster for", "check", check.CheckID, "error", err)
continue
}
paths = append(paths, p)
}
}
// Create a new cluster if we need to expose a port that is different from the service port
for _, path := range paths {
if path.LocalPathPort == cfgSnap.Proxy.LocalServicePort {
continue
}
c, err := s.makeAppCluster(cfgSnap, makeExposeClusterName(path.LocalPathPort), path.Protocol, path.LocalPathPort)
if err != nil {
s.Logger.Warn("failed to make local cluster", "path", path.Path, "error", err)
continue
}
clusters = append(clusters, c)
}
return clusters, nil
}
func makeExposeClusterName(destinationPort int) string {
return fmt.Sprintf("exposed_cluster_%d", destinationPort)
}
// In transparent proxy mode there are potentially multiple passthrough clusters added.
// The first is for destinations outside of Consul's catalog. This is for a plain TCP proxy.
// All of these use Envoy's ORIGINAL_DST listener filter, which forwards to the original
// destination address (before the iptables redirection).
// The rest are for destinations inside the mesh, which require certificates for mTLS.
func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
// This size is an upper bound.
clusters := make([]proto.Message, 0, len(cfgSnap.ConnectProxy.PassthroughUpstreams)+1)
if meshConf := cfgSnap.MeshConfig(); meshConf == nil ||
!meshConf.TransparentProxy.MeshDestinationsOnly {
clusters = append(clusters, &envoy_cluster_v3.Cluster{
Name: OriginalDestinationClusterName,
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{
Type: envoy_cluster_v3.Cluster_ORIGINAL_DST,
},
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
ConnectTimeout: durationpb.New(5 * time.Second),
})
}
for uid, chain := range cfgSnap.ConnectProxy.DiscoveryChain {
targetMap, ok := cfgSnap.ConnectProxy.PassthroughUpstreams[uid]
if !ok {
continue
}
for targetID := range targetMap {
uid := proxycfg.NewUpstreamIDFromTargetID(targetID)
sni := connect.ServiceSNI(
uid.Name, "", uid.NamespaceOrDefault(), uid.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
// Prefixed with passthrough to distinguish from non-passthrough clusters for the same upstream.
name := "passthrough~" + sni
c := envoy_cluster_v3.Cluster{
Name: name,
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{
Type: envoy_cluster_v3.Cluster_ORIGINAL_DST,
},
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
ConnectTimeout: durationpb.New(5 * time.Second),
}
if discoTarget, ok := chain.Targets[targetID]; ok && discoTarget.ConnectTimeout > 0 {
c.ConnectTimeout = durationpb.New(discoTarget.ConnectTimeout)
}
transportSocket, err := makeMTLSTransportSocket(cfgSnap, uid, sni)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
clusters = append(clusters, &c)
}
}
err := cfgSnap.ConnectProxy.DestinationsUpstream.ForEachKeyE(func(uid proxycfg.UpstreamID) error {
svcConfig, ok := cfgSnap.ConnectProxy.DestinationsUpstream.Get(uid)
if !ok || svcConfig.Destination == nil {
return nil
}
// One Cluster per Destination Address
for _, address := range svcConfig.Destination.Addresses {
name := clusterNameForDestination(cfgSnap, uid.Name, address, uid.NamespaceOrDefault(), uid.PartitionOrDefault())
c := envoy_cluster_v3.Cluster{
Name: name,
AltStatName: name,
ConnectTimeout: durationpb.New(5 * time.Second),
CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{
HealthyPanicThreshold: &envoy_type_v3.Percent{
Value: 0, // disable panic threshold
},
},
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
},
// Endpoints are managed separately by EDS
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoy_cluster_v3.OutlierDetection{},
}
// Use the cluster name as the SNI to match on in the terminating gateway
transportSocket, err := makeMTLSTransportSocket(cfgSnap, uid, name)
if err != nil {
return err
}
c.TransportSocket = transportSocket
clusters = append(clusters, &c)
}
return nil
})
if err != nil {
return nil, err
}
return clusters, nil
}
func makeMTLSTransportSocket(cfgSnap *proxycfg.ConfigSnapshot, uid proxycfg.UpstreamID, sni string) (*envoy_core_v3.TransportSocket, error) {
spiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Partition: uid.PartitionOrDefault(),
Namespace: uid.NamespaceOrDefault(),
Datacenter: cfgSnap.Datacenter,
Service: uid.Name,
}
commonTLSContext := makeCommonTLSContext(
cfgSnap.Leaf(),
cfgSnap.RootPEMs(),
makeTLSParametersFromProxyTLSConfig(cfgSnap.MeshConfigTLSOutgoing()),
)
err := injectSANMatcher(commonTLSContext, spiffeID.URI().String())
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(&tlsContext)
if err != nil {
return nil, err
}
return transportSocket, nil
}
func clusterNameForDestination(cfgSnap *proxycfg.ConfigSnapshot, name string, address string, namespace string, partition string) string {
name = destinationSpecificServiceName(name, address)
sni := connect.ServiceSNI(name, "", namespace, partition, cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
// Prefixed with destination to distinguish from non-passthrough clusters for the same upstream.
return "destination." + sni
}
func destinationSpecificServiceName(name string, address string) string {
address = strings.ReplaceAll(address, ":", "-")
address = strings.ReplaceAll(address, ".", "-")
return fmt.Sprintf("%s.%s", address, name)
}
// clustersFromSnapshotMeshGateway returns the xDS API representation of the "clusters"
// for a mesh gateway. This will include 1 cluster per remote datacenter as well as
// 1 cluster for each service subset.
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) clustersFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
2021-10-26 21:58:23 +00:00
keys := cfgSnap.MeshGateway.GatewayKeys()
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// Allocation count (this is a lower bound - all subset specific clusters will be appended):
// 1 cluster per remote dc/partition
// 1 cluster per local service
// 1 cluster per unique peer server (control plane traffic)
clusters := make([]proto.Message, 0, len(keys)+len(cfgSnap.MeshGateway.ServiceGroups)+len(cfgSnap.MeshGateway.PeerServers))
2021-10-27 18:36:44 +00:00
// Generate the remote clusters
for _, key := range keys {
2021-10-29 00:41:58 +00:00
if key.Matches(cfgSnap.Datacenter, cfgSnap.ProxyID.PartitionOrDefault()) {
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
continue // skip local
}
opts := clusterOpts{
name: connect.GatewaySNI(key.Datacenter, key.Partition, cfgSnap.Roots.TrustDomain),
hostnameEndpoints: cfgSnap.MeshGateway.HostnameDatacenters[key.String()],
2021-10-29 00:41:58 +00:00
isRemote: true,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
clusters = append(clusters, cluster)
}
2021-10-26 22:10:30 +00:00
if cfgSnap.ProxyID.InDefaultPartition() &&
2021-10-26 21:58:23 +00:00
cfgSnap.ServiceMeta[structs.MetaWANFederationKey] == "1" &&
cfgSnap.ServerSNIFn != nil {
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// Add all of the remote wildcard datacenter mappings for servers.
for _, key := range keys {
hostnameEndpoints := cfgSnap.MeshGateway.HostnameDatacenters[key.String()]
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// If the DC is our current DC then this cluster is for traffic from a remote DC to a local server.
// HostnameDatacenters is populated with gateway addresses, so it does not apply here.
if key.Datacenter == cfgSnap.Datacenter {
hostnameEndpoints = nil
}
opts := clusterOpts{
name: cfgSnap.ServerSNIFn(key.Datacenter, ""),
hostnameEndpoints: hostnameEndpoints,
2021-10-29 00:41:58 +00:00
isRemote: !key.Matches(cfgSnap.Datacenter, cfgSnap.ProxyID.PartitionOrDefault()),
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
clusters = append(clusters, cluster)
}
// And for the current datacenter, send all flavors appropriately.
servers, _ := cfgSnap.MeshGateway.WatchedLocalServers.Get(structs.ConsulServiceName)
for _, srv := range servers {
opts := clusterOpts{
name: cfgSnap.ServerSNIFn(cfgSnap.Datacenter, srv.Node.Node),
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
clusters = append(clusters, cluster)
}
}
// Create a single cluster for local servers to be dialed by peers.
// When peering through gateways we load balance across the local servers. They cannot be addressed individually.
if cfg := cfgSnap.MeshConfig(); cfg.PeerThroughMeshGateways() {
servers, _ := cfgSnap.MeshGateway.WatchedLocalServers.Get(structs.ConsulServiceName)
// Peering control-plane traffic can only ever be handled by the local leader.
// We avoid routing to read replicas since they will never be Raft voters.
if haveVoters(servers) {
cluster := s.makeGatewayCluster(cfgSnap, clusterOpts{
name: connect.PeeringServerSAN(cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain),
})
clusters = append(clusters, cluster)
}
}
2020-04-14 14:59:23 +00:00
// generate the per-service/subset clusters
2020-09-11 16:49:26 +00:00
c, err := s.makeGatewayServiceClusters(cfgSnap, cfgSnap.MeshGateway.ServiceGroups, cfgSnap.MeshGateway.ServiceResolvers)
2020-04-14 14:59:23 +00:00
if err != nil {
return nil, err
}
clusters = append(clusters, c...)
// generate the outgoing clusters for imported peer services.
c, err = s.makeGatewayOutgoingClusterPeeringServiceClusters(cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, c...)
// Generate per-target clusters for all exported discovery chains.
c, err = s.makeExportedUpstreamClustersForMeshGateway(cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, c...)
// Generate one cluster for each unique peer server for control plane traffic
c, err = s.makePeerServerClusters(cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, c...)
2020-04-14 14:59:23 +00:00
return clusters, nil
}
func haveVoters(servers structs.CheckServiceNodes) bool {
for _, srv := range servers {
if isReplica := srv.Service.Meta["read_replica"]; isReplica == "true" {
continue
}
return true
}
return false
}
func (s *ResourceGenerator) makePeerServerClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
if cfgSnap.Kind != structs.ServiceKindMeshGateway {
return nil, fmt.Errorf("unsupported gateway kind %q", cfgSnap.Kind)
}
clusters := make([]proto.Message, 0, len(cfgSnap.MeshGateway.PeerServers))
// Peer server names are assumed to already be formatted in SNI notation:
// server.<datacenter>.peering.<trust-domain>
for name, servers := range cfgSnap.MeshGateway.PeerServers {
if len(servers.Addresses) == 0 {
continue
}
var cluster *envoy_cluster_v3.Cluster
if servers.UseCDS {
cluster = s.makeExternalHostnameCluster(cfgSnap, clusterOpts{
name: name,
addresses: servers.Addresses,
})
} else {
cluster = s.makeGatewayCluster(cfgSnap, clusterOpts{
name: name,
})
}
clusters = append(clusters, cluster)
}
return clusters, nil
}
// clustersFromSnapshotTerminatingGateway returns the xDS API representation of the "clusters"
// for a terminating gateway. This will include 1 cluster per Destination associated with this terminating gateway.
func (s *ResourceGenerator) clustersFromSnapshotTerminatingGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
res := []proto.Message{}
gwClusters, err := s.makeGatewayServiceClusters(cfgSnap, cfgSnap.TerminatingGateway.ServiceGroups, cfgSnap.TerminatingGateway.ServiceResolvers)
if err != nil {
return nil, err
}
res = append(res, gwClusters...)
destClusters, err := s.makeDestinationClusters(cfgSnap)
if err != nil {
return nil, err
}
res = append(res, destClusters...)
return res, nil
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) makeGatewayServiceClusters(
2020-09-11 16:49:26 +00:00
cfgSnap *proxycfg.ConfigSnapshot,
services map[structs.ServiceName]structs.CheckServiceNodes,
resolvers map[structs.ServiceName]*structs.ServiceResolverConfigEntry,
) ([]proto.Message, error) {
var hostnameEndpoints structs.CheckServiceNodes
switch cfgSnap.Kind {
2020-09-11 16:49:26 +00:00
case structs.ServiceKindTerminatingGateway, structs.ServiceKindMeshGateway:
default:
return nil, fmt.Errorf("unsupported gateway kind %q", cfgSnap.Kind)
}
2020-04-14 14:59:23 +00:00
clusters := make([]proto.Message, 0, len(services))
for svc := range services {
clusterName := connect.ServiceSNI(svc.Name, "", svc.NamespaceOrDefault(), svc.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
2020-04-14 14:59:23 +00:00
resolver, hasResolver := resolvers[svc]
2020-09-11 15:21:43 +00:00
var loadBalancer *structs.LoadBalancer
2020-08-28 20:27:40 +00:00
if !hasResolver {
// Use a zero value resolver with no timeout and no subsets
resolver = &structs.ServiceResolverConfigEntry{}
}
if resolver.LoadBalancer != nil {
2020-09-11 15:21:43 +00:00
loadBalancer = resolver.LoadBalancer
}
// When making service clusters we only pass endpoints with hostnames if the kind is a terminating gateway
// This is because the services a mesh gateway will route to are not external services and are not addressed by a hostname.
if cfgSnap.Kind == structs.ServiceKindTerminatingGateway {
hostnameEndpoints = cfgSnap.TerminatingGateway.HostnameServices[svc]
}
2021-10-29 00:41:58 +00:00
var isRemote bool
if len(services[svc]) > 0 {
isRemote = !cfgSnap.Locality.Matches(services[svc][0].Node.Datacenter, services[svc][0].Node.PartitionOrDefault())
2021-10-29 00:41:58 +00:00
}
opts := clusterOpts{
name: clusterName,
hostnameEndpoints: hostnameEndpoints,
connectTimeout: resolver.ConnectTimeout,
2021-10-29 00:41:58 +00:00
isRemote: isRemote,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
2020-09-11 16:49:26 +00:00
if err := s.injectGatewayServiceAddons(cfgSnap, cluster, svc, loadBalancer); err != nil {
return nil, err
}
clusters = append(clusters, cluster)
svcConfig, ok := cfgSnap.TerminatingGateway.ServiceConfigs[svc]
isHTTP2 := false
if ok {
upstreamCfg, err := structs.ParseUpstreamConfig(svcConfig.ProxyConfig)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse", "upstream", svc, "error", err)
}
isHTTP2 = upstreamCfg.Protocol == "http2" || upstreamCfg.Protocol == "grpc"
}
if isHTTP2 {
if err := s.setHttp2ProtocolOptions(cluster); err != nil {
return nil, err
}
}
// If there is a service-resolver for this service then also setup a cluster for each subset
for name, subset := range resolver.Subsets {
subsetHostnameEndpoints, err := s.filterSubsetEndpoints(&subset, hostnameEndpoints)
if err != nil {
return nil, err
}
opts := clusterOpts{
name: connect.ServiceSNI(svc.Name, name, svc.NamespaceOrDefault(), svc.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain),
hostnameEndpoints: subsetHostnameEndpoints,
onlyPassing: subset.OnlyPassing,
connectTimeout: resolver.ConnectTimeout,
2021-10-29 00:41:58 +00:00
isRemote: isRemote,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
2020-09-11 16:49:26 +00:00
if err := s.injectGatewayServiceAddons(cfgSnap, cluster, svc, loadBalancer); err != nil {
return nil, err
}
if isHTTP2 {
if err := s.setHttp2ProtocolOptions(cluster); err != nil {
return nil, err
}
}
clusters = append(clusters, cluster)
}
}
return clusters, nil
}
func (s *ResourceGenerator) makeGatewayOutgoingClusterPeeringServiceClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
if cfgSnap.Kind != structs.ServiceKindMeshGateway {
return nil, fmt.Errorf("unsupported gateway kind %q", cfgSnap.Kind)
}
var clusters []proto.Message
for _, serviceGroups := range cfgSnap.MeshGateway.PeeringServices {
for sn, serviceGroup := range serviceGroups {
if len(serviceGroup.Nodes) == 0 {
continue
}
node := serviceGroup.Nodes[0]
if node.Service == nil {
return nil, fmt.Errorf("couldn't get SNI for peered service %s", sn.String())
}
// This uses the SNI in the accepting cluster peer so the remote mesh
// gateway can distinguish between an exported service as opposed to the
// usual mesh gateway route for a service.
clusterName := node.Service.Connect.PeerMeta.PrimarySNI()
opts := clusterOpts{
name: clusterName,
isRemote: true,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
if serviceGroup.UseCDS {
configureClusterWithHostnames(
s.Logger,
cluster,
"", /*TODO:make configurable?*/
serviceGroup.Nodes,
true, /*isRemote*/
false, /*onlyPassing*/
)
} else {
cluster.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}
cluster.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
}
}
clusters = append(clusters, cluster)
}
}
return clusters, nil
}
func (s *ResourceGenerator) makeDestinationClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
serviceConfigs := cfgSnap.TerminatingGateway.ServiceConfigs
clusters := make([]proto.Message, 0, len(cfgSnap.TerminatingGateway.DestinationServices))
for _, svcName := range cfgSnap.TerminatingGateway.ValidDestinations() {
svcConfig, _ := serviceConfigs[svcName]
dest := svcConfig.Destination
for _, address := range dest.Addresses {
opts := clusterOpts{
name: clusterNameForDestination(cfgSnap, svcName.Name, address, svcName.NamespaceOrDefault(), svcName.PartitionOrDefault()),
addresses: []structs.ServiceAddress{
{
Address: address,
Port: dest.Port,
},
},
}
var cluster *envoy_cluster_v3.Cluster
if structs.IsIP(address) {
cluster = s.makeExternalIPCluster(cfgSnap, opts)
} else {
cluster = s.makeExternalHostnameCluster(cfgSnap, opts)
}
if err := s.injectGatewayDestinationAddons(cfgSnap, cluster, svcName); err != nil {
return nil, err
}
clusters = append(clusters, cluster)
}
}
return clusters, nil
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) injectGatewayServiceAddons(cfgSnap *proxycfg.ConfigSnapshot, c *envoy_cluster_v3.Cluster, svc structs.ServiceName, lb *structs.LoadBalancer) error {
2020-09-11 16:49:26 +00:00
switch cfgSnap.Kind {
case structs.ServiceKindMeshGateway:
// We can't apply hash based LB config to mesh gateways because they rely on inspecting HTTP attributes
// and mesh gateways do not decrypt traffic
if !lb.IsHashBased() {
if err := injectLBToCluster(lb, c); err != nil {
return fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", c.Name, err)
}
}
case structs.ServiceKindTerminatingGateway:
// Context used for TLS origination to the cluster
if mapping, ok := cfgSnap.TerminatingGateway.GatewayServices[svc]; ok && mapping.CAFile != "" {
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
2020-09-11 16:49:26 +00:00
CommonTlsContext: makeCommonTLSContextFromFiles(mapping.CAFile, mapping.CertFile, mapping.KeyFile),
}
if mapping.SNI != "" {
tlsContext.Sni = mapping.SNI
if err := injectSANMatcher(tlsContext.CommonTlsContext, mapping.SNI); err != nil {
return fmt.Errorf("failed to inject SNI matcher into TLS context: %v", err)
}
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return err
2020-09-11 16:49:26 +00:00
}
c.TransportSocket = transportSocket
2020-09-11 16:49:26 +00:00
}
if err := injectLBToCluster(lb, c); err != nil {
return fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", c.Name, err)
}
}
return nil
}
func (s *ResourceGenerator) injectGatewayDestinationAddons(cfgSnap *proxycfg.ConfigSnapshot, c *envoy_cluster_v3.Cluster, svc structs.ServiceName) error {
switch cfgSnap.Kind {
case structs.ServiceKindTerminatingGateway:
// Context used for TLS origination to the cluster
if mapping, ok := cfgSnap.TerminatingGateway.DestinationServices[svc]; ok && mapping.CAFile != "" {
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: makeCommonTLSContextFromFiles(mapping.CAFile, mapping.CertFile, mapping.KeyFile),
}
if mapping.SNI != "" {
tlsContext.Sni = mapping.SNI
if err := injectSANMatcher(tlsContext.CommonTlsContext, mapping.SNI); err != nil {
return fmt.Errorf("failed to inject SNI matcher into TLS context: %v", err)
}
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return err
}
c.TransportSocket = transportSocket
}
}
return nil
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) clustersFromSnapshotIngressGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
var clusters []proto.Message
createdClusters := make(map[proxycfg.UpstreamID]bool)
for listenerKey, upstreams := range cfgSnap.IngressGateway.Upstreams {
for _, u := range upstreams {
uid := proxycfg.NewUpstreamID(&u)
// If we've already created a cluster for this upstream, skip it. Multiple listeners may
// reference the same upstream, so we don't need to create duplicate clusters in that case.
if createdClusters[uid] {
continue
}
chain, ok := cfgSnap.IngressGateway.DiscoveryChain[uid]
if !ok {
// this should not happen
return nil, fmt.Errorf("no discovery chain for upstream %q", uid)
}
upstreamClusters, err := s.makeUpstreamClustersForDiscoveryChain(
uid,
&u,
chain,
cfgSnap,
false,
)
if err != nil {
return nil, err
}
for _, c := range upstreamClusters {
s.configIngressUpstreamCluster(c, cfgSnap, listenerKey, &u)
clusters = append(clusters, c)
}
createdClusters[uid] = true
}
}
return clusters, nil
}
func (s *ResourceGenerator) configIngressUpstreamCluster(c *envoy_cluster_v3.Cluster, cfgSnap *proxycfg.ConfigSnapshot, listenerKey proxycfg.IngressListenerKey, u *structs.Upstream) {
var threshold *envoy_cluster_v3.CircuitBreakers_Thresholds
setThresholdLimit := func(limitType string, limit int) {
if limit <= 0 {
return
}
if threshold == nil {
threshold = &envoy_cluster_v3.CircuitBreakers_Thresholds{}
}
switch limitType {
case "max_connections":
threshold.MaxConnections = makeUint32Value(limit)
case "max_pending_requests":
threshold.MaxPendingRequests = makeUint32Value(limit)
case "max_requests":
threshold.MaxRequests = makeUint32Value(limit)
}
}
setThresholdLimit("max_connections", int(cfgSnap.IngressGateway.Defaults.MaxConnections))
setThresholdLimit("max_pending_requests", int(cfgSnap.IngressGateway.Defaults.MaxPendingRequests))
setThresholdLimit("max_requests", int(cfgSnap.IngressGateway.Defaults.MaxConcurrentRequests))
// Adjust the limit for upstream service
// Lookup listener and service config details from ingress gateway
// definition.
var svc *structs.IngressService
if lCfg, ok := cfgSnap.IngressGateway.Listeners[listenerKey]; ok {
svc = findIngressServiceMatchingUpstream(lCfg, *u)
}
if svc != nil {
setThresholdLimit("max_connections", int(svc.MaxConnections))
setThresholdLimit("max_pending_requests", int(svc.MaxPendingRequests))
setThresholdLimit("max_requests", int(svc.MaxConcurrentRequests))
}
if threshold != nil {
c.CircuitBreakers.Thresholds = []*envoy_cluster_v3.CircuitBreakers_Thresholds{threshold}
}
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) makeAppCluster(cfgSnap *proxycfg.ConfigSnapshot, name, pathProtocol string, port int) (*envoy_cluster_v3.Cluster, error) {
var c *envoy_cluster_v3.Cluster
var err error
cfg, err := ParseProxyConfig(cfgSnap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse Connect.Proxy.Config", "error", err)
}
// If we have overridden local cluster config try to parse it into an Envoy cluster
if cfg.LocalClusterJSON != "" {
return makeClusterFromUserConfig(cfg.LocalClusterJSON)
}
var endpoint *envoy_endpoint_v3.LbEndpoint
if cfgSnap.Proxy.LocalServiceSocketPath != "" {
endpoint = makePipeEndpoint(cfgSnap.Proxy.LocalServiceSocketPath)
} else {
addr := cfgSnap.Proxy.LocalServiceAddress
if addr == "" {
addr = "127.0.0.1"
}
endpoint = makeEndpoint(addr, port)
}
c = &envoy_cluster_v3.Cluster{
Name: name,
ConnectTimeout: durationpb.New(time.Duration(cfg.LocalConnectTimeoutMs) * time.Millisecond),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_STATIC},
LoadAssignment: &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{
{
LbEndpoints: []*envoy_endpoint_v3.LbEndpoint{
endpoint,
},
},
},
},
}
protocol := pathProtocol
if protocol == "" {
protocol = cfg.Protocol
}
if protocol == "http2" || protocol == "grpc" {
if err := s.setHttp2ProtocolOptions(c); err != nil {
return c, err
}
}
if cfg.MaxInboundConnections > 0 {
c.CircuitBreakers = &envoy_cluster_v3.CircuitBreakers{
Thresholds: []*envoy_cluster_v3.CircuitBreakers_Thresholds{
{
MaxConnections: makeUint32Value(cfg.MaxInboundConnections),
},
},
}
}
return c, err
}
func (s *ResourceGenerator) makeUpstreamClusterForPeerService(
uid proxycfg.UpstreamID,
upstreamConfig structs.UpstreamConfig,
peerMeta structs.PeeringServiceMeta,
cfgSnap *proxycfg.ConfigSnapshot,
) (*envoy_cluster_v3.Cluster, error) {
var (
c *envoy_cluster_v3.Cluster
err error
)
if upstreamConfig.EnvoyClusterJSON != "" {
c, err = makeClusterFromUserConfig(upstreamConfig.EnvoyClusterJSON)
if err != nil {
return c, err
}
// In the happy path don't return yet as we need to inject TLS config still.
}
upstreamsSnapshot, err := cfgSnap.ToConfigSnapshotUpstreams()
if err != nil {
return c, err
}
tbs, ok := upstreamsSnapshot.UpstreamPeerTrustBundles.Get(uid.Peer)
if !ok {
// this should never happen since we loop through upstreams with
// set trust bundles
return c, fmt.Errorf("trust bundle not ready for peer %s", uid.Peer)
}
clusterName := generatePeeredClusterName(uid, tbs)
outlierDetection := ToOutlierDetection(upstreamConfig.PassiveHealthCheck)
// We can't rely on health checks for services on cluster peers because they
// don't take into account service resolvers, splitters and routers. Setting
// MaxEjectionPercent too 100% gives outlier detection the power to eject the
// entire cluster.
outlierDetection.MaxEjectionPercent = &wrappers.UInt32Value{Value: 100}
s.Logger.Trace("generating cluster for", "cluster", clusterName)
if c == nil {
c = &envoy_cluster_v3.Cluster{
Name: clusterName,
ConnectTimeout: durationpb.New(time.Duration(upstreamConfig.ConnectTimeoutMs) * time.Millisecond),
CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{
HealthyPanicThreshold: &envoy_type_v3.Percent{
Value: 0, // disable panic threshold
},
},
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(upstreamConfig.Limits),
},
OutlierDetection: outlierDetection,
}
if upstreamConfig.Protocol == "http2" || upstreamConfig.Protocol == "grpc" {
if err := s.setHttp2ProtocolOptions(c); err != nil {
return c, err
}
}
useEDS := true
if _, ok := cfgSnap.ConnectProxy.PeerUpstreamEndpointsUseHostnames[uid]; ok {
// If we're using local mesh gw, the fact that upstreams use hostnames don't matter.
// If we're not using local mesh gw, then resort to CDS.
if upstreamConfig.MeshGateway.Mode != structs.MeshGatewayModeLocal {
useEDS = false
}
}
// If none of the service instances are addressed by a hostname we
// provide the endpoint IP addresses via EDS
if useEDS {
c.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}
c.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
}
} else {
2022-07-13 16:14:57 +00:00
ep, _ := cfgSnap.ConnectProxy.PeerUpstreamEndpoints.Get(uid)
configureClusterWithHostnames(
s.Logger,
c,
"", /*TODO:make configurable?*/
2022-07-13 16:14:57 +00:00
ep,
true, /*isRemote*/
false, /*onlyPassing*/
)
}
}
rootPEMs := cfgSnap.RootPEMs()
if uid.Peer != "" {
tbs, _ := upstreamsSnapshot.UpstreamPeerTrustBundles.Get(uid.Peer)
2022-07-13 16:14:57 +00:00
rootPEMs = tbs.ConcatenatedRootPEMs()
}
// Enable TLS upstream with the configured client certificate.
commonTLSContext := makeCommonTLSContext(
cfgSnap.Leaf(),
rootPEMs,
makeTLSParametersFromProxyTLSConfig(cfgSnap.MeshConfigTLSOutgoing()),
)
err = injectSANMatcher(commonTLSContext, peerMeta.SpiffeID...)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", clusterName, err)
}
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: peerMeta.PrimarySNI(),
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
return c, nil
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream, cfgSnap *proxycfg.ConfigSnapshot) (*envoy_cluster_v3.Cluster, error) {
var c *envoy_cluster_v3.Cluster
var err error
uid := proxycfg.NewUpstreamID(&upstream)
dc := upstream.Datacenter
if dc == "" {
dc = cfgSnap.Datacenter
}
sni := connect.UpstreamSNI(&upstream, "", dc, cfgSnap.Roots.TrustDomain)
cfg, err := structs.ParseUpstreamConfig(upstream.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse", "upstream", uid, "error", err)
}
if cfg.EnvoyClusterJSON != "" {
c, err = makeClusterFromUserConfig(cfg.EnvoyClusterJSON)
if err != nil {
return c, err
}
// In the happy path don't return yet as we need to inject TLS config still.
}
if c == nil {
c = &envoy_cluster_v3.Cluster{
Name: sni,
ConnectTimeout: durationpb.New(time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
},
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
},
OutlierDetection: ToOutlierDetection(cfg.PassiveHealthCheck),
}
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
if err := s.setHttp2ProtocolOptions(c); err != nil {
return c, err
}
}
}
endpoints := cfgSnap.ConnectProxy.PreparedQueryEndpoints[uid]
var (
spiffeIDs = make([]string, 0)
seen = make(map[string]struct{})
)
for _, e := range endpoints {
id := fmt.Sprintf("%s/%s", e.Node.Datacenter, e.Service.CompoundServiceName())
if _, ok := seen[id]; ok {
continue
}
seen[id] = struct{}{}
name := e.Service.Proxy.DestinationServiceName
if e.Service.Connect.Native {
name = e.Service.Service
}
spiffeIDs = append(spiffeIDs, connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: e.Service.NamespaceOrDefault(),
Partition: e.Service.PartitionOrDefault(),
Datacenter: e.Node.Datacenter,
Service: name,
}.URI().String())
}
// Enable TLS upstream with the configured client certificate.
commonTLSContext := makeCommonTLSContext(
cfgSnap.Leaf(),
cfgSnap.RootPEMs(),
makeTLSParametersFromProxyTLSConfig(cfgSnap.MeshConfigTLSOutgoing()),
)
err = injectSANMatcher(commonTLSContext, spiffeIDs...)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
return c, nil
}
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
uid proxycfg.UpstreamID,
2021-03-17 19:40:49 +00:00
upstream *structs.Upstream,
chain *structs.CompiledDiscoveryChain,
cfgSnap *proxycfg.ConfigSnapshot,
forMeshGateway bool,
) ([]*envoy_cluster_v3.Cluster, error) {
if chain == nil {
return nil, fmt.Errorf("cannot create upstream cluster without discovery chain for %s", uid)
}
if uid.Peer != "" && forMeshGateway {
return nil, fmt.Errorf("impossible to get a peer discovery chain in a mesh gateway")
}
upstreamConfigMap := make(map[string]interface{})
2021-03-17 19:40:49 +00:00
if upstream != nil {
upstreamConfigMap = upstream.Config
2021-03-17 19:40:49 +00:00
}
upstreamsSnapshot, err := cfgSnap.ToConfigSnapshotUpstreams()
// Mesh gateways are exempt because upstreamsSnapshot is only used for
// cluster peering targets and transative failover/redirects are unsupported.
if err != nil && !forMeshGateway {
return nil, err
}
rawUpstreamConfig, err := structs.ParseUpstreamConfigNoDefaults(upstreamConfigMap)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse", "upstream", uid,
"error", err)
}
finalizeUpstreamConfig := func(cfg structs.UpstreamConfig, connectTimeout time.Duration) structs.UpstreamConfig {
if cfg.Protocol == "" {
cfg.Protocol = chain.Protocol
}
if cfg.Protocol == "" {
cfg.Protocol = "tcp"
}
if cfg.ConnectTimeoutMs == 0 {
cfg.ConnectTimeoutMs = int(connectTimeout / time.Millisecond)
}
return cfg
}
var escapeHatchCluster *envoy_cluster_v3.Cluster
if !forMeshGateway {
if rawUpstreamConfig.EnvoyClusterJSON != "" {
if chain.Default {
// If you haven't done anything to setup the discovery chain, then
// you can use the envoy_cluster_json escape hatch.
escapeHatchCluster, err = makeClusterFromUserConfig(rawUpstreamConfig.EnvoyClusterJSON)
if err != nil {
return nil, err
}
} else {
s.Logger.Warn("ignoring escape hatch setting, because a discovery chain is configured for",
"discovery chain", chain.ServiceName, "upstream", uid,
"envoy_cluster_json", chain.ServiceName)
}
}
}
var out []*envoy_cluster_v3.Cluster
for _, node := range chain.Nodes {
switch {
case node == nil:
return nil, fmt.Errorf("impossible to process a nil node")
case node.Type != structs.DiscoveryGraphNodeTypeResolver:
continue
case node.Resolver == nil:
return nil, fmt.Errorf("impossible to process a non-resolver node")
}
connect: fix failover through a mesh gateway to a remote datacenter (#6259) Failover is pushed entirely down to the data plane by creating envoy clusters and putting each successive destination in a different load assignment priority band. For example this shows that normally requests go to 1.2.3.4:8080 but when that fails they go to 6.7.8.9:8080: - name: foo load_assignment: cluster_name: foo policy: overprovisioning_factor: 100000 endpoints: - priority: 0 lb_endpoints: - endpoint: address: socket_address: address: 1.2.3.4 port_value: 8080 - priority: 1 lb_endpoints: - endpoint: address: socket_address: address: 6.7.8.9 port_value: 8080 Mesh gateways route requests based solely on the SNI header tacked onto the TLS layer. Envoy currently only lets you configure the outbound SNI header at the cluster layer. If you try to failover through a mesh gateway you ideally would configure the SNI value per endpoint, but that's not possible in envoy today. This PR introduces a simpler way around the problem for now: 1. We identify any target of failover that will use mesh gateway mode local or remote and then further isolate any resolver node in the compiled discovery chain that has a failover destination set to one of those targets. 2. For each of these resolvers we will perform a small measurement of comparative healths of the endpoints that come back from the health API for the set of primary target and serial failover targets. We walk the list of targets in order and if any endpoint is healthy we return that target, otherwise we move on to the next target. 3. The CDS and EDS endpoints both perform the measurements in (2) for the affected resolver nodes. 4. For CDS this measurement selects which TLS SNI field to use for the cluster (note the cluster is always going to be named for the primary target) 5. For EDS this measurement selects which set of endpoints will populate the cluster. Priority tiered failover is ignored. One of the big downsides to this approach to failover is that the failover detection and correction is going to be controlled by consul rather than deferring that entirely to the data plane as with the prior version. This also means that we are bound to only failover using official health signals and cannot make use of data plane signals like outlier detection to affect failover. In this specific scenario the lack of data plane signals is ok because the effectiveness is already muted by the fact that the ultimate destination endpoints will have their data plane signals scrambled when they pass through the mesh gateway wrapper anyway so we're not losing much. Another related fix is that we now use the endpoint health from the underlying service, not the health of the gateway (regardless of failover mode).
2019-08-05 18:30:35 +00:00
failover := node.Resolver.Failover
// These variables are prefixed with primary to avoid shaddowing bugs.
primaryTargetID := node.Resolver.Target
primaryTarget := chain.Targets[primaryTargetID]
primaryTargetClusterData, ok := s.getTargetClusterData(upstreamsSnapshot, chain, primaryTargetID, forMeshGateway, false)
if !ok {
continue
}
upstreamConfig := finalizeUpstreamConfig(rawUpstreamConfig, node.Resolver.ConnectTimeout)
if forMeshGateway && !cfgSnap.Locality.Matches(primaryTarget.Datacenter, primaryTarget.Partition) {
s.Logger.Warn("ignoring discovery chain target that crosses a datacenter or partition boundary in a mesh gateway",
"target", primaryTarget,
"gatewayLocality", cfgSnap.Locality,
)
continue
}
// Construct the information required to make target clusters. When
// failover is configured, create the aggregate cluster.
var targetClustersData []targetClusterData
if failover != nil && !forMeshGateway {
var failoverClusterNames []string
for _, tid := range append([]string{primaryTargetID}, failover.Targets...) {
if td, ok := s.getTargetClusterData(upstreamsSnapshot, chain, tid, forMeshGateway, true); ok {
targetClustersData = append(targetClustersData, td)
failoverClusterNames = append(failoverClusterNames, td.clusterName)
}
}
aggregateClusterConfig, err := anypb.New(&envoy_aggregate_cluster_v3.ClusterConfig{
Clusters: failoverClusterNames,
})
if err != nil {
return nil, fmt.Errorf("failed to construct the aggregate cluster %q: %v", primaryTargetClusterData.clusterName, err)
}
connect: fix failover through a mesh gateway to a remote datacenter (#6259) Failover is pushed entirely down to the data plane by creating envoy clusters and putting each successive destination in a different load assignment priority band. For example this shows that normally requests go to 1.2.3.4:8080 but when that fails they go to 6.7.8.9:8080: - name: foo load_assignment: cluster_name: foo policy: overprovisioning_factor: 100000 endpoints: - priority: 0 lb_endpoints: - endpoint: address: socket_address: address: 1.2.3.4 port_value: 8080 - priority: 1 lb_endpoints: - endpoint: address: socket_address: address: 6.7.8.9 port_value: 8080 Mesh gateways route requests based solely on the SNI header tacked onto the TLS layer. Envoy currently only lets you configure the outbound SNI header at the cluster layer. If you try to failover through a mesh gateway you ideally would configure the SNI value per endpoint, but that's not possible in envoy today. This PR introduces a simpler way around the problem for now: 1. We identify any target of failover that will use mesh gateway mode local or remote and then further isolate any resolver node in the compiled discovery chain that has a failover destination set to one of those targets. 2. For each of these resolvers we will perform a small measurement of comparative healths of the endpoints that come back from the health API for the set of primary target and serial failover targets. We walk the list of targets in order and if any endpoint is healthy we return that target, otherwise we move on to the next target. 3. The CDS and EDS endpoints both perform the measurements in (2) for the affected resolver nodes. 4. For CDS this measurement selects which TLS SNI field to use for the cluster (note the cluster is always going to be named for the primary target) 5. For EDS this measurement selects which set of endpoints will populate the cluster. Priority tiered failover is ignored. One of the big downsides to this approach to failover is that the failover detection and correction is going to be controlled by consul rather than deferring that entirely to the data plane as with the prior version. This also means that we are bound to only failover using official health signals and cannot make use of data plane signals like outlier detection to affect failover. In this specific scenario the lack of data plane signals is ok because the effectiveness is already muted by the fact that the ultimate destination endpoints will have their data plane signals scrambled when they pass through the mesh gateway wrapper anyway so we're not losing much. Another related fix is that we now use the endpoint health from the underlying service, not the health of the gateway (regardless of failover mode).
2019-08-05 18:30:35 +00:00
c := &envoy_cluster_v3.Cluster{
Name: primaryTargetClusterData.clusterName,
AltStatName: primaryTargetClusterData.clusterName,
ConnectTimeout: durationpb.New(node.Resolver.ConnectTimeout),
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_ClusterType{
ClusterType: &envoy_cluster_v3.Cluster_CustomClusterType{
Name: "envoy.clusters.aggregate",
TypedConfig: aggregateClusterConfig,
},
},
}
out = append(out, c)
} else {
targetClustersData = append(targetClustersData, primaryTargetClusterData)
}
// Construct the target clusters.
for _, targetData := range targetClustersData {
target := chain.Targets[targetData.targetID]
sni := target.SNI
var additionalSpiffeIDs []string
targetSpiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: target.Namespace,
Partition: target.Partition,
Datacenter: target.Datacenter,
Service: target.Service,
}.URI().String()
targetUID := proxycfg.NewUpstreamIDFromTargetID(targetData.targetID)
if targetUID.Peer != "" {
peerMeta := upstreamsSnapshot.UpstreamPeerMeta(targetUID)
upstreamCluster, err := s.makeUpstreamClusterForPeerService(targetUID, upstreamConfig, peerMeta, cfgSnap)
if err != nil {
continue
}
// Override the cluster name to include the failover-target~ prefix.
upstreamCluster.Name = targetData.clusterName
out = append(out, upstreamCluster)
continue
connect: fix failover through a mesh gateway to a remote datacenter (#6259) Failover is pushed entirely down to the data plane by creating envoy clusters and putting each successive destination in a different load assignment priority band. For example this shows that normally requests go to 1.2.3.4:8080 but when that fails they go to 6.7.8.9:8080: - name: foo load_assignment: cluster_name: foo policy: overprovisioning_factor: 100000 endpoints: - priority: 0 lb_endpoints: - endpoint: address: socket_address: address: 1.2.3.4 port_value: 8080 - priority: 1 lb_endpoints: - endpoint: address: socket_address: address: 6.7.8.9 port_value: 8080 Mesh gateways route requests based solely on the SNI header tacked onto the TLS layer. Envoy currently only lets you configure the outbound SNI header at the cluster layer. If you try to failover through a mesh gateway you ideally would configure the SNI value per endpoint, but that's not possible in envoy today. This PR introduces a simpler way around the problem for now: 1. We identify any target of failover that will use mesh gateway mode local or remote and then further isolate any resolver node in the compiled discovery chain that has a failover destination set to one of those targets. 2. For each of these resolvers we will perform a small measurement of comparative healths of the endpoints that come back from the health API for the set of primary target and serial failover targets. We walk the list of targets in order and if any endpoint is healthy we return that target, otherwise we move on to the next target. 3. The CDS and EDS endpoints both perform the measurements in (2) for the affected resolver nodes. 4. For CDS this measurement selects which TLS SNI field to use for the cluster (note the cluster is always going to be named for the primary target) 5. For EDS this measurement selects which set of endpoints will populate the cluster. Priority tiered failover is ignored. One of the big downsides to this approach to failover is that the failover detection and correction is going to be controlled by consul rather than deferring that entirely to the data plane as with the prior version. This also means that we are bound to only failover using official health signals and cannot make use of data plane signals like outlier detection to affect failover. In this specific scenario the lack of data plane signals is ok because the effectiveness is already muted by the fact that the ultimate destination endpoints will have their data plane signals scrambled when they pass through the mesh gateway wrapper anyway so we're not losing much. Another related fix is that we now use the endpoint health from the underlying service, not the health of the gateway (regardless of failover mode).
2019-08-05 18:30:35 +00:00
}
s.Logger.Debug("generating cluster for", "cluster", targetData.clusterName)
c := &envoy_cluster_v3.Cluster{
Name: targetData.clusterName,
AltStatName: targetData.clusterName,
ConnectTimeout: durationpb.New(node.Resolver.ConnectTimeout),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{
HealthyPanicThreshold: &envoy_type_v3.Percent{
Value: 0, // disable panic threshold
},
},
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
},
// TODO(peering): make circuit breakers or outlier detection work?
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(upstreamConfig.Limits),
},
OutlierDetection: ToOutlierDetection(upstreamConfig.PassiveHealthCheck),
}
var lb *structs.LoadBalancer
if node.LoadBalancer != nil {
lb = node.LoadBalancer
}
if err := injectLBToCluster(lb, c); err != nil {
return nil, fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", targetData.clusterName, err)
}
2020-08-28 20:27:40 +00:00
if upstreamConfig.Protocol == "http2" || upstreamConfig.Protocol == "grpc" {
if err := s.setHttp2ProtocolOptions(c); err != nil {
return nil, err
}
}
configureTLS := true
if forMeshGateway {
// We only initiate TLS if we're doing an L7 proxy.
configureTLS = structs.IsProtocolHTTPLike(upstreamConfig.Protocol)
}
if configureTLS {
commonTLSContext := makeCommonTLSContext(
cfgSnap.Leaf(),
cfgSnap.RootPEMs(),
makeTLSParametersFromProxyTLSConfig(cfgSnap.MeshConfigTLSOutgoing()),
)
spiffeIDs := append([]string{targetSpiffeID}, additionalSpiffeIDs...)
sort.Strings(spiffeIDs)
err = injectSANMatcher(commonTLSContext, spiffeIDs...)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
}
out = append(out, c)
}
}
if escapeHatchCluster != nil {
if len(out) != 1 {
return nil, fmt.Errorf("cannot inject escape hatch cluster when discovery chain had no nodes")
}
defaultCluster := out[0]
// Overlay what the user provided.
escapeHatchCluster.TransportSocket = defaultCluster.TransportSocket
out = []*envoy_cluster_v3.Cluster{escapeHatchCluster}
}
return out, nil
}
func (s *ResourceGenerator) makeExportedUpstreamClustersForMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
// NOTE: Despite the mesh gateway already having one cluster per service
// (and subset) in the local datacenter we cannot reliably use those to
// send inbound peered traffic targeting a discovery chain.
//
// For starters, none of those add TLS so they'd be unusable for http-like
// L7 protocols.
//
// Additionally, those other clusters are all thin wrappers around simple
// catalog resolutions and are largely not impacted by various
// customizations related to a service-resolver, such as configuring the
// failover section.
//
// Instead we create brand new clusters solely to accept incoming peered
// traffic and give them a unique cluster prefix name to avoid collisions
// to keep the two use cases separate.
var clusters []proto.Message
createdExportedClusters := make(map[string]struct{}) // key=clusterName
for _, svc := range cfgSnap.MeshGatewayValidExportedServices() {
chain := cfgSnap.MeshGateway.DiscoveryChain[svc]
exportClusters, err := s.makeUpstreamClustersForDiscoveryChain(
proxycfg.NewUpstreamIDFromServiceName(svc),
nil,
chain,
cfgSnap,
true,
)
if err != nil {
return nil, err
}
for _, cluster := range exportClusters {
if _, ok := createdExportedClusters[cluster.Name]; ok {
continue
}
createdExportedClusters[cluster.Name] = struct{}{}
clusters = append(clusters, cluster)
}
}
return clusters, nil
}
// injectSANMatcher updates a TLS context so that it verifies the upstream SAN.
func injectSANMatcher(tlsContext *envoy_tls_v3.CommonTlsContext, matchStrings ...string) error {
validationCtx, ok := tlsContext.ValidationContextType.(*envoy_tls_v3.CommonTlsContext_ValidationContext)
if !ok {
return fmt.Errorf("invalid type: expected CommonTlsContext_ValidationContext, got %T",
tlsContext.ValidationContextType)
}
var matchers []*envoy_matcher_v3.StringMatcher
for _, m := range matchStrings {
matchers = append(matchers, &envoy_matcher_v3.StringMatcher{
MatchPattern: &envoy_matcher_v3.StringMatcher_Exact{
Exact: m,
},
})
}
validationCtx.ValidationContext.MatchSubjectAltNames = matchers
return nil
}
// makeClusterFromUserConfig returns the listener config decoded from an
// arbitrary proto3 json format string or an error if it's invalid.
//
// For now we only support embedding in JSON strings because of the hcl parsing
// pain (see Background section in the comment for decode.HookWeakDecodeFromSlice).
// This may be fixed in decode.HookWeakDecodeFromSlice in the future.
//
// When we do that we can support just nesting the config directly into the
// JSON/hcl naturally but this is a stop-gap that gets us an escape hatch
// immediately. It's also probably not a bad thing to support long-term since
// any config generated by other systems will likely be in canonical protobuf
// from rather than our slight variant in JSON/hcl.
func makeClusterFromUserConfig(configJSON string) (*envoy_cluster_v3.Cluster, error) {
// Type field is present so decode it as a types.Any
var any any.Any
err := jsonpb.UnmarshalString(configJSON, &any)
if err != nil {
return nil, err
}
// And then unmarshal the listener again...
var c envoy_cluster_v3.Cluster
err = proto.Unmarshal(any.Value, &c)
if err != nil {
return nil, err
}
return &c, err
}
type addressPair struct {
host string
port int
}
type clusterOpts struct {
// name for the cluster
name string
// isRemote determines whether the cluster is in a remote DC and we should prefer a WAN address
isRemote bool
// onlyPassing determines whether endpoints that do not have a passing status should be considered unhealthy
onlyPassing bool
// connectTimeout is the timeout for new network connections to hosts in the cluster
connectTimeout time.Duration
// hostnameEndpoints is a list of endpoints with a hostname as their address
hostnameEndpoints structs.CheckServiceNodes
// Corresponds to a valid address/port pairs to be routed externally
// these addresses will be embedded in the cluster configuration and will never use EDS
addresses []structs.ServiceAddress
}
// makeGatewayCluster creates an Envoy cluster for a mesh or terminating gateway
func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, opts clusterOpts) *envoy_cluster_v3.Cluster {
cfg, err := ParseGatewayConfig(snap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse gateway config", "error", err)
}
if opts.connectTimeout <= 0 {
opts.connectTimeout = time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond
}
cluster := &envoy_cluster_v3.Cluster{
Name: opts.name,
ConnectTimeout: durationpb.New(opts.connectTimeout),
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoy_cluster_v3.OutlierDetection{},
}
useEDS := true
if len(opts.hostnameEndpoints) > 0 {
useEDS = false
}
// TCP keepalive settings can be enabled for terminating gateway upstreams or remote mesh gateways.
remoteUpstream := opts.isRemote || snap.Kind == structs.ServiceKindTerminatingGateway
if remoteUpstream && cfg.TcpKeepaliveEnable {
cluster.UpstreamConnectionOptions = &envoy_cluster_v3.UpstreamConnectionOptions{
TcpKeepalive: &envoy_core_v3.TcpKeepalive{},
}
if cfg.TcpKeepaliveTime != 0 {
cluster.UpstreamConnectionOptions.TcpKeepalive.KeepaliveTime = makeUint32Value(cfg.TcpKeepaliveTime)
}
if cfg.TcpKeepaliveInterval != 0 {
cluster.UpstreamConnectionOptions.TcpKeepalive.KeepaliveInterval = makeUint32Value(cfg.TcpKeepaliveInterval)
}
if cfg.TcpKeepaliveProbes != 0 {
cluster.UpstreamConnectionOptions.TcpKeepalive.KeepaliveProbes = makeUint32Value(cfg.TcpKeepaliveProbes)
}
}
// If none of the service instances are addressed by a hostname we provide the endpoint IP addresses via EDS
if useEDS {
cluster.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}
cluster.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
}
} else {
configureClusterWithHostnames(
s.Logger,
cluster,
cfg.DNSDiscoveryType,
opts.hostnameEndpoints,
opts.isRemote,
opts.onlyPassing,
)
}
return cluster
}
func configureClusterWithHostnames(
logger hclog.Logger,
cluster *envoy_cluster_v3.Cluster,
dnsDiscoveryType string,
// hostnameEndpoints is a list of endpoints with a hostname as their address
hostnameEndpoints structs.CheckServiceNodes,
// isRemote determines whether the cluster is in a remote DC or partition and we should prefer a WAN address
isRemote bool,
// onlyPassing determines whether endpoints that do not have a passing status should be considered unhealthy
onlyPassing bool,
) {
// When a service instance is addressed by a hostname we have Envoy do the DNS resolution
// by setting a DNS cluster type and passing the hostname endpoints via CDS.
rate := 10 * time.Second
cluster.DnsRefreshRate = durationpb.New(rate)
cluster.DnsLookupFamily = envoy_cluster_v3.Cluster_V4_ONLY
discoveryType := envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_LOGICAL_DNS}
if dnsDiscoveryType == "strict_dns" {
discoveryType.Type = envoy_cluster_v3.Cluster_STRICT_DNS
}
cluster.ClusterDiscoveryType = &discoveryType
endpoints := make([]*envoy_endpoint_v3.LbEndpoint, 0, 1)
uniqueHostnames := make(map[string]bool)
var (
hostname string
idx int
fallback *envoy_endpoint_v3.LbEndpoint
)
for i, e := range hostnameEndpoints {
_, addr, port := e.BestAddress(isRemote)
uniqueHostnames[addr] = true
health, weight := calculateEndpointHealthAndWeight(e, onlyPassing)
if health == envoy_core_v3.HealthStatus_UNHEALTHY {
fallback = makeLbEndpoint(addr, port, health, weight)
continue
}
if len(endpoints) == 0 {
endpoints = append(endpoints, makeLbEndpoint(addr, port, health, weight))
hostname = addr
idx = i
break
}
}
dc := hostnameEndpoints[idx].Node.Datacenter
service := hostnameEndpoints[idx].Service.CompoundServiceName()
// Fall back to last unhealthy endpoint if none were healthy
if len(endpoints) == 0 {
logger.Warn("upstream service does not contain any healthy instances",
"dc", dc, "service", service.String())
endpoints = append(endpoints, fallback)
}
if len(uniqueHostnames) > 1 {
logger.Warn(fmt.Sprintf("service contains instances with more than one unique hostname; only %q be resolved by Envoy", hostname),
Support Incremental xDS mode (#9855) This adds support for the Incremental xDS protocol when using xDS v3. This is best reviewed commit-by-commit and will not be squashed when merged. Union of all commit messages follows to give an overarching summary: xds: exclusively support incremental xDS when using xDS v3 Attempts to use SoTW via v3 will fail, much like attempts to use incremental via v2 will fail. Work around a strange older envoy behavior involving empty CDS responses over incremental xDS. xds: various cleanups and refactors that don't strictly concern the addition of incremental xDS support Dissolve the connectionInfo struct in favor of per-connection ResourceGenerators instead. Do a better job of ensuring the xds code uses a well configured logger that accurately describes the connected client. xds: pull out checkStreamACLs method in advance of a later commit xds: rewrite SoTW xDS protocol tests to use protobufs rather than hand-rolled json strings In the test we very lightly reuse some of the more boring protobuf construction helper code that is also technically under test. The important thing of the protocol tests is testing the protocol. The actual inputs and outputs are largely already handled by the xds golden output tests now so these protocol tests don't have to do double-duty. This also updates the SoTW protocol test to exclusively use xDS v2 which is the only variant of SoTW that will be supported in Consul 1.10. xds: default xds.Server.AuthCheckFrequency at use-time instead of construction-time
2021-04-29 18:54:05 +00:00
"dc", dc, "service", service.String())
}
cluster.LoadAssignment = &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: cluster.Name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{
{
LbEndpoints: endpoints,
},
},
}
}
// makeExternalIPCluster creates an Envoy cluster for routing to IP addresses outside of Consul
// This is used by terminating gateways for Destinations
func (s *ResourceGenerator) makeExternalIPCluster(snap *proxycfg.ConfigSnapshot, opts clusterOpts) *envoy_cluster_v3.Cluster {
cfg, err := ParseGatewayConfig(snap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse gateway config", "error", err)
}
if opts.connectTimeout <= 0 {
opts.connectTimeout = time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond
}
cluster := &envoy_cluster_v3.Cluster{
Name: opts.name,
ConnectTimeout: durationpb.New(opts.connectTimeout),
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoy_cluster_v3.OutlierDetection{},
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_STATIC},
}
endpoints := make([]*envoy_endpoint_v3.LbEndpoint, 0, len(opts.addresses))
for _, pair := range opts.addresses {
endpoints = append(endpoints, makeEndpoint(pair.Address, pair.Port))
}
cluster.LoadAssignment = &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: cluster.Name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{
{
LbEndpoints: endpoints,
},
},
}
return cluster
}
// makeExternalHostnameCluster creates an Envoy cluster for hostname endpoints that will be resolved with DNS
// This is used by both terminating gateways for Destinations, and Mesh Gateways for peering control plane traffice
func (s *ResourceGenerator) makeExternalHostnameCluster(snap *proxycfg.ConfigSnapshot, opts clusterOpts) *envoy_cluster_v3.Cluster {
cfg, err := ParseGatewayConfig(snap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse gateway config", "error", err)
}
opts.connectTimeout = time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond
cluster := &envoy_cluster_v3.Cluster{
Name: opts.name,
ConnectTimeout: durationpb.New(opts.connectTimeout),
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoy_cluster_v3.OutlierDetection{},
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_LOGICAL_DNS},
2022-07-25 22:18:00 +00:00
DnsLookupFamily: envoy_cluster_v3.Cluster_V4_ONLY,
}
rate := 10 * time.Second
cluster.DnsRefreshRate = durationpb.New(rate)
endpoints := make([]*envoy_endpoint_v3.LbEndpoint, 0, len(opts.addresses))
for _, pair := range opts.addresses {
address := makeAddress(pair.Address, pair.Port)
endpoint := &envoy_endpoint_v3.LbEndpoint{
HostIdentifier: &envoy_endpoint_v3.LbEndpoint_Endpoint{
Endpoint: &envoy_endpoint_v3.Endpoint{
Address: address,
},
},
}
endpoints = append(endpoints, endpoint)
}
cluster.LoadAssignment = &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: cluster.Name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{{
LbEndpoints: endpoints,
}},
}
return cluster
}
func makeThresholdsIfNeeded(limits *structs.UpstreamLimits) []*envoy_cluster_v3.CircuitBreakers_Thresholds {
if limits == nil {
return nil
}
threshold := &envoy_cluster_v3.CircuitBreakers_Thresholds{}
// Likewise, make sure to not set any threshold values on the zero-value in
// order to rely on Envoy defaults
if limits.MaxConnections != nil {
threshold.MaxConnections = makeUint32Value(*limits.MaxConnections)
}
if limits.MaxPendingRequests != nil {
threshold.MaxPendingRequests = makeUint32Value(*limits.MaxPendingRequests)
}
if limits.MaxConcurrentRequests != nil {
threshold.MaxRequests = makeUint32Value(*limits.MaxConcurrentRequests)
}
return []*envoy_cluster_v3.CircuitBreakers_Thresholds{threshold}
}
func makeLbEndpoint(addr string, port int, health envoy_core_v3.HealthStatus, weight int) *envoy_endpoint_v3.LbEndpoint {
return &envoy_endpoint_v3.LbEndpoint{
HostIdentifier: &envoy_endpoint_v3.LbEndpoint_Endpoint{
Endpoint: &envoy_endpoint_v3.Endpoint{
Address: &envoy_core_v3.Address{
Address: &envoy_core_v3.Address_SocketAddress{
SocketAddress: &envoy_core_v3.SocketAddress{
Address: addr,
PortSpecifier: &envoy_core_v3.SocketAddress_PortValue{
PortValue: uint32(port),
},
},
},
},
},
},
HealthStatus: health,
LoadBalancingWeight: makeUint32Value(weight),
}
}
func injectLBToCluster(ec *structs.LoadBalancer, c *envoy_cluster_v3.Cluster) error {
if ec == nil {
return nil
}
switch ec.Policy {
case "":
return nil
case structs.LBPolicyLeastRequest:
c.LbPolicy = envoy_cluster_v3.Cluster_LEAST_REQUEST
if ec.LeastRequestConfig != nil {
c.LbConfig = &envoy_cluster_v3.Cluster_LeastRequestLbConfig_{
LeastRequestLbConfig: &envoy_cluster_v3.Cluster_LeastRequestLbConfig{
ChoiceCount: &wrappers.UInt32Value{Value: ec.LeastRequestConfig.ChoiceCount},
},
}
}
case structs.LBPolicyRoundRobin:
c.LbPolicy = envoy_cluster_v3.Cluster_ROUND_ROBIN
case structs.LBPolicyRandom:
c.LbPolicy = envoy_cluster_v3.Cluster_RANDOM
case structs.LBPolicyRingHash:
c.LbPolicy = envoy_cluster_v3.Cluster_RING_HASH
if ec.RingHashConfig != nil {
c.LbConfig = &envoy_cluster_v3.Cluster_RingHashLbConfig_{
RingHashLbConfig: &envoy_cluster_v3.Cluster_RingHashLbConfig{
MinimumRingSize: &wrappers.UInt64Value{Value: ec.RingHashConfig.MinimumRingSize},
MaximumRingSize: &wrappers.UInt64Value{Value: ec.RingHashConfig.MaximumRingSize},
},
}
}
case structs.LBPolicyMaglev:
c.LbPolicy = envoy_cluster_v3.Cluster_MAGLEV
default:
return fmt.Errorf("unsupported load balancer policy %q for cluster %q", ec.Policy, c.Name)
}
return nil
}
func (s *ResourceGenerator) setHttp2ProtocolOptions(c *envoy_cluster_v3.Cluster) error {
cfg := &envoy_upstreams_v3.HttpProtocolOptions{
UpstreamProtocolOptions: &envoy_upstreams_v3.HttpProtocolOptions_ExplicitHttpConfig_{
ExplicitHttpConfig: &envoy_upstreams_v3.HttpProtocolOptions_ExplicitHttpConfig{
ProtocolConfig: &envoy_upstreams_v3.HttpProtocolOptions_ExplicitHttpConfig_Http2ProtocolOptions{
Http2ProtocolOptions: &envoy_core_v3.Http2ProtocolOptions{},
},
},
},
}
any, err := anypb.New(cfg)
if err != nil {
return err
}
c.TypedExtensionProtocolOptions = map[string]*anypb.Any{
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": any,
}
return nil
}
// generatePeeredClusterName returns an SNI-like cluster name which mimics PeeredServiceSNI
// but excludes partition information which could be ambiguous (local vs remote partition).
func generatePeeredClusterName(uid proxycfg.UpstreamID, tb *pbpeering.PeeringTrustBundle) string {
return strings.Join([]string{
uid.Name,
uid.NamespaceOrDefault(),
uid.Peer,
"external",
tb.TrustDomain,
}, ".")
}
type targetClusterData struct {
targetID string
clusterName string
}
func (s *ResourceGenerator) getTargetClusterData(upstreamsSnapshot *proxycfg.ConfigSnapshotUpstreams, chain *structs.CompiledDiscoveryChain, tid string, forMeshGateway bool, failover bool) (targetClusterData, bool) {
target := chain.Targets[tid]
clusterName := target.Name
targetUID := proxycfg.NewUpstreamIDFromTargetID(tid)
if targetUID.Peer != "" {
tbs, ok := upstreamsSnapshot.UpstreamPeerTrustBundles.Get(targetUID.Peer)
// We can't generate cluster on peers without the trust bundle. The
// trust bundle should be ready soon.
if !ok {
s.Logger.Debug("peer trust bundle not ready for discovery chain target",
"peer", targetUID.Peer,
"target", tid,
)
return targetClusterData{}, false
}
clusterName = generatePeeredClusterName(targetUID, tbs)
}
clusterName = CustomizeClusterName(clusterName, chain)
if failover {
clusterName = failoverClusterNamePrefix + clusterName
}
if forMeshGateway {
clusterName = meshGatewayExportedClusterNamePrefix + clusterName
}
return targetClusterData{
targetID: tid,
clusterName: clusterName,
}, true
}