open-consul/agent/xds/clusters.go
freddygv 7fba7456ec Fix race of upstreams with same passthrough ip
Due to timing, a transparent proxy could have two upstreams to dial
directly with the same address.

For example:
- The orders service can dial upstreams shipping and payment directly.
- An instance of shipping at address 10.0.0.1 is deregistered.
- Payments is scaled up and scheduled to have address 10.0.0.1.
- The orders service receives the event for the new payments instance
before seeing the deregistration for the shipping instance. At this
point two upstreams have the same passthrough address and Envoy will
reject the listener configuration.

To disambiguate this commit considers the Raft index when storing
passthrough addresses. In the example above, 10.0.0.1 would only be
associated with the newer payments service instance.
2022-02-10 17:01:57 -07:00

1040 lines
34 KiB
Go

package xds
import (
"errors"
"fmt"
"sort"
"time"
envoy_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
envoy_core_v3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
envoy_endpoint_v3 "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
envoy_tls_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3"
envoy_matcher_v3 "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3"
envoy_type_v3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/golang/protobuf/jsonpb"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes"
"github.com/golang/protobuf/ptypes/any"
"github.com/golang/protobuf/ptypes/wrappers"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/proxycfg"
"github.com/hashicorp/consul/agent/structs"
)
// clustersFromSnapshot returns the xDS API representation of the "clusters" in the snapshot.
func (s *ResourceGenerator) clustersFromSnapshot(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
if cfgSnap == nil {
return nil, errors.New("nil config given")
}
switch cfgSnap.Kind {
case structs.ServiceKindConnectProxy:
return s.clustersFromSnapshotConnectProxy(cfgSnap)
case structs.ServiceKindTerminatingGateway:
res, err := s.makeGatewayServiceClusters(cfgSnap, cfgSnap.TerminatingGateway.ServiceGroups, cfgSnap.TerminatingGateway.ServiceResolvers)
if err != nil {
return nil, err
}
return res, nil
case structs.ServiceKindMeshGateway:
res, err := s.clustersFromSnapshotMeshGateway(cfgSnap)
if err != nil {
return nil, err
}
return res, nil
case structs.ServiceKindIngressGateway:
res, err := s.clustersFromSnapshotIngressGateway(cfgSnap)
if err != nil {
return nil, err
}
return res, nil
default:
return nil, fmt.Errorf("Invalid service kind: %v", cfgSnap.Kind)
}
}
// clustersFromSnapshot returns the xDS API representation of the "clusters"
// (upstreams) in the snapshot.
func (s *ResourceGenerator) clustersFromSnapshotConnectProxy(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
// This sizing is a lower bound.
clusters := make([]proto.Message, 0, len(cfgSnap.ConnectProxy.DiscoveryChain)+1)
// Include the "app" cluster for the public listener
appCluster, err := s.makeAppCluster(cfgSnap, LocalAppClusterName, "", cfgSnap.Proxy.LocalServicePort)
if err != nil {
return nil, err
}
clusters = append(clusters, appCluster)
if cfgSnap.Proxy.Mode == structs.ProxyModeTransparent {
passthroughs, err := makePassthroughClusters(cfgSnap)
if err != nil {
return nil, fmt.Errorf("failed to make passthrough clusters for transparent proxy: %v", err)
}
clusters = append(clusters, passthroughs...)
}
for uid, chain := range cfgSnap.ConnectProxy.DiscoveryChain {
upstreamCfg := cfgSnap.ConnectProxy.UpstreamConfig[uid]
explicit := upstreamCfg.HasLocalPortOrSocket()
if _, implicit := cfgSnap.ConnectProxy.IntentionUpstreams[uid]; !implicit && !explicit {
// Discovery chain is not associated with a known explicit or implicit upstream so it is skipped.
continue
}
chainEndpoints, ok := cfgSnap.ConnectProxy.WatchedUpstreamEndpoints[uid]
if !ok {
// this should not happen
return nil, fmt.Errorf("no endpoint map for upstream %q", uid)
}
upstreamClusters, err := s.makeUpstreamClustersForDiscoveryChain(uid, upstreamCfg, chain, chainEndpoints, cfgSnap)
if err != nil {
return nil, err
}
for _, cluster := range upstreamClusters {
clusters = append(clusters, cluster)
}
}
for _, u := range cfgSnap.Proxy.Upstreams {
if u.DestinationType != structs.UpstreamDestTypePreparedQuery {
continue
}
upstreamCluster, err := s.makeUpstreamClusterForPreparedQuery(u, cfgSnap)
if err != nil {
return nil, err
}
clusters = append(clusters, upstreamCluster)
}
cfgSnap.Proxy.Expose.Finalize()
paths := cfgSnap.Proxy.Expose.Paths
// Add service health checks to the list of paths to create clusters for if needed
if cfgSnap.Proxy.Expose.Checks {
psid := structs.NewServiceID(cfgSnap.Proxy.DestinationServiceID, &cfgSnap.ProxyID.EnterpriseMeta)
for _, check := range s.CheckFetcher.ServiceHTTPBasedChecks(psid) {
p, err := parseCheckPath(check)
if err != nil {
s.Logger.Warn("failed to create cluster for", "check", check.CheckID, "error", err)
continue
}
paths = append(paths, p)
}
}
// Create a new cluster if we need to expose a port that is different from the service port
for _, path := range paths {
if path.LocalPathPort == cfgSnap.Proxy.LocalServicePort {
continue
}
c, err := s.makeAppCluster(cfgSnap, makeExposeClusterName(path.LocalPathPort), path.Protocol, path.LocalPathPort)
if err != nil {
s.Logger.Warn("failed to make local cluster", "path", path.Path, "error", err)
continue
}
clusters = append(clusters, c)
}
return clusters, nil
}
func makeExposeClusterName(destinationPort int) string {
return fmt.Sprintf("exposed_cluster_%d", destinationPort)
}
// In transparent proxy mode there are potentially multiple passthrough clusters added.
// The first is for destinations outside of Consul's catalog. This is for a plain TCP proxy.
// All of these use Envoy's ORIGINAL_DST listener filter, which forwards to the original
// destination address (before the iptables redirection).
// The rest are for destinations inside the mesh, which require certificates for mTLS.
func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
// This size is an upper bound.
clusters := make([]proto.Message, 0, len(cfgSnap.ConnectProxy.PassthroughUpstreams)+1)
if cfgSnap.ConnectProxy.MeshConfig == nil ||
!cfgSnap.ConnectProxy.MeshConfig.TransparentProxy.MeshDestinationsOnly {
clusters = append(clusters, &envoy_cluster_v3.Cluster{
Name: OriginalDestinationClusterName,
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{
Type: envoy_cluster_v3.Cluster_ORIGINAL_DST,
},
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
ConnectTimeout: ptypes.DurationProto(5 * time.Second),
})
}
for _, target := range cfgSnap.ConnectProxy.PassthroughUpstreams {
for tid := range target {
uid := proxycfg.NewUpstreamIDFromTargetID(tid)
sni := connect.ServiceSNI(
uid.Name, "", uid.NamespaceOrDefault(), uid.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
// Prefixed with passthrough to distinguish from non-passthrough clusters for the same upstream.
name := "passthrough~" + sni
c := envoy_cluster_v3.Cluster{
Name: name,
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{
Type: envoy_cluster_v3.Cluster_ORIGINAL_DST,
},
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
// TODO(tproxy) This should use the connection timeout configured on the upstream's config entry
ConnectTimeout: ptypes.DurationProto(5 * time.Second),
}
spiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Partition: uid.PartitionOrDefault(),
Namespace: uid.NamespaceOrDefault(),
Datacenter: cfgSnap.Datacenter,
Service: uid.Name,
}
commonTLSContext := makeCommonTLSContextFromLeafWithoutParams(cfgSnap, cfgSnap.Leaf())
err := injectSANMatcher(commonTLSContext, spiffeID)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(&tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
clusters = append(clusters, &c)
}
}
return clusters, nil
}
// clustersFromSnapshotMeshGateway returns the xDS API representation of the "clusters"
// for a mesh gateway. This will include 1 cluster per remote datacenter as well as
// 1 cluster for each service subset.
func (s *ResourceGenerator) clustersFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
keys := cfgSnap.MeshGateway.GatewayKeys()
// 1 cluster per remote dc/partition + 1 cluster per local service (this is a lower bound - all subset specific clusters will be appended)
clusters := make([]proto.Message, 0, len(keys)+len(cfgSnap.MeshGateway.ServiceGroups))
// Generate the remote clusters
for _, key := range keys {
if key.Matches(cfgSnap.Datacenter, cfgSnap.ProxyID.PartitionOrDefault()) {
continue // skip local
}
opts := gatewayClusterOpts{
name: connect.GatewaySNI(key.Datacenter, key.Partition, cfgSnap.Roots.TrustDomain),
hostnameEndpoints: cfgSnap.MeshGateway.HostnameDatacenters[key.String()],
isRemote: true,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
clusters = append(clusters, cluster)
}
if cfgSnap.ProxyID.InDefaultPartition() &&
cfgSnap.ServiceMeta[structs.MetaWANFederationKey] == "1" &&
cfgSnap.ServerSNIFn != nil {
// Add all of the remote wildcard datacenter mappings for servers.
for _, key := range keys {
hostnameEndpoints := cfgSnap.MeshGateway.HostnameDatacenters[key.String()]
// If the DC is our current DC then this cluster is for traffic from a remote DC to a local server.
// HostnameDatacenters is populated with gateway addresses, so it does not apply here.
if key.Datacenter == cfgSnap.Datacenter {
hostnameEndpoints = nil
}
opts := gatewayClusterOpts{
name: cfgSnap.ServerSNIFn(key.Datacenter, ""),
hostnameEndpoints: hostnameEndpoints,
isRemote: !key.Matches(cfgSnap.Datacenter, cfgSnap.ProxyID.PartitionOrDefault()),
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
clusters = append(clusters, cluster)
}
// And for the current datacenter, send all flavors appropriately.
for _, srv := range cfgSnap.MeshGateway.ConsulServers {
opts := gatewayClusterOpts{
name: cfgSnap.ServerSNIFn(cfgSnap.Datacenter, srv.Node.Node),
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
clusters = append(clusters, cluster)
}
}
// generate the per-service/subset clusters
c, err := s.makeGatewayServiceClusters(cfgSnap, cfgSnap.MeshGateway.ServiceGroups, cfgSnap.MeshGateway.ServiceResolvers)
if err != nil {
return nil, err
}
clusters = append(clusters, c...)
return clusters, nil
}
func (s *ResourceGenerator) makeGatewayServiceClusters(
cfgSnap *proxycfg.ConfigSnapshot,
services map[structs.ServiceName]structs.CheckServiceNodes,
resolvers map[structs.ServiceName]*structs.ServiceResolverConfigEntry,
) ([]proto.Message, error) {
var hostnameEndpoints structs.CheckServiceNodes
switch cfgSnap.Kind {
case structs.ServiceKindTerminatingGateway, structs.ServiceKindMeshGateway:
default:
return nil, fmt.Errorf("unsupported gateway kind %q", cfgSnap.Kind)
}
clusters := make([]proto.Message, 0, len(services))
for svc := range services {
clusterName := connect.ServiceSNI(svc.Name, "", svc.NamespaceOrDefault(), svc.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
resolver, hasResolver := resolvers[svc]
var loadBalancer *structs.LoadBalancer
if !hasResolver {
// Use a zero value resolver with no timeout and no subsets
resolver = &structs.ServiceResolverConfigEntry{}
}
if resolver.LoadBalancer != nil {
loadBalancer = resolver.LoadBalancer
}
// When making service clusters we only pass endpoints with hostnames if the kind is a terminating gateway
// This is because the services a mesh gateway will route to are not external services and are not addressed by a hostname.
if cfgSnap.Kind == structs.ServiceKindTerminatingGateway {
hostnameEndpoints = cfgSnap.TerminatingGateway.HostnameServices[svc]
}
var isRemote bool
if len(services[svc]) > 0 {
isRemote = !cfgSnap.Locality.Matches(services[svc][0].Node.Datacenter, services[svc][0].Node.PartitionOrDefault())
}
opts := gatewayClusterOpts{
name: clusterName,
hostnameEndpoints: hostnameEndpoints,
connectTimeout: resolver.ConnectTimeout,
isRemote: isRemote,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
if err := s.injectGatewayServiceAddons(cfgSnap, cluster, svc, loadBalancer); err != nil {
return nil, err
}
clusters = append(clusters, cluster)
// If there is a service-resolver for this service then also setup a cluster for each subset
for name, subset := range resolver.Subsets {
subsetHostnameEndpoints, err := s.filterSubsetEndpoints(&subset, hostnameEndpoints)
if err != nil {
return nil, err
}
opts := gatewayClusterOpts{
name: connect.ServiceSNI(svc.Name, name, svc.NamespaceOrDefault(), svc.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain),
hostnameEndpoints: subsetHostnameEndpoints,
onlyPassing: subset.OnlyPassing,
connectTimeout: resolver.ConnectTimeout,
isRemote: isRemote,
}
cluster := s.makeGatewayCluster(cfgSnap, opts)
if err := s.injectGatewayServiceAddons(cfgSnap, cluster, svc, loadBalancer); err != nil {
return nil, err
}
clusters = append(clusters, cluster)
}
}
return clusters, nil
}
func (s *ResourceGenerator) injectGatewayServiceAddons(cfgSnap *proxycfg.ConfigSnapshot, c *envoy_cluster_v3.Cluster, svc structs.ServiceName, lb *structs.LoadBalancer) error {
switch cfgSnap.Kind {
case structs.ServiceKindMeshGateway:
// We can't apply hash based LB config to mesh gateways because they rely on inspecting HTTP attributes
// and mesh gateways do not decrypt traffic
if !lb.IsHashBased() {
if err := injectLBToCluster(lb, c); err != nil {
return fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", c.Name, err)
}
}
case structs.ServiceKindTerminatingGateway:
// Context used for TLS origination to the cluster
if mapping, ok := cfgSnap.TerminatingGateway.GatewayServices[svc]; ok && mapping.CAFile != "" {
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: makeCommonTLSContextFromFiles(mapping.CAFile, mapping.CertFile, mapping.KeyFile),
}
if mapping.SNI != "" {
tlsContext.Sni = mapping.SNI
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return err
}
c.TransportSocket = transportSocket
}
if err := injectLBToCluster(lb, c); err != nil {
return fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", c.Name, err)
}
}
return nil
}
func (s *ResourceGenerator) clustersFromSnapshotIngressGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
var clusters []proto.Message
createdClusters := make(map[proxycfg.UpstreamID]bool)
for _, upstreams := range cfgSnap.IngressGateway.Upstreams {
for _, u := range upstreams {
uid := proxycfg.NewUpstreamID(&u)
// If we've already created a cluster for this upstream, skip it. Multiple listeners may
// reference the same upstream, so we don't need to create duplicate clusters in that case.
if createdClusters[uid] {
continue
}
chain, ok := cfgSnap.IngressGateway.DiscoveryChain[uid]
if !ok {
// this should not happen
return nil, fmt.Errorf("no discovery chain for upstream %q", uid)
}
chainEndpoints, ok := cfgSnap.IngressGateway.WatchedUpstreamEndpoints[uid]
if !ok {
// this should not happen
return nil, fmt.Errorf("no endpoint map for upstream %q", uid)
}
upstreamClusters, err := s.makeUpstreamClustersForDiscoveryChain(uid, &u, chain, chainEndpoints, cfgSnap)
if err != nil {
return nil, err
}
for _, c := range upstreamClusters {
clusters = append(clusters, c)
}
createdClusters[uid] = true
}
}
return clusters, nil
}
func (s *ResourceGenerator) makeAppCluster(cfgSnap *proxycfg.ConfigSnapshot, name, pathProtocol string, port int) (*envoy_cluster_v3.Cluster, error) {
var c *envoy_cluster_v3.Cluster
var err error
cfg, err := ParseProxyConfig(cfgSnap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse Connect.Proxy.Config", "error", err)
}
// If we have overridden local cluster config try to parse it into an Envoy cluster
if cfg.LocalClusterJSON != "" {
return makeClusterFromUserConfig(cfg.LocalClusterJSON)
}
var endpoint *envoy_endpoint_v3.LbEndpoint
if cfgSnap.Proxy.LocalServiceSocketPath != "" {
endpoint = makePipeEndpoint(cfgSnap.Proxy.LocalServiceSocketPath)
} else {
addr := cfgSnap.Proxy.LocalServiceAddress
if addr == "" {
addr = "127.0.0.1"
}
endpoint = makeEndpoint(addr, port)
}
c = &envoy_cluster_v3.Cluster{
Name: name,
ConnectTimeout: ptypes.DurationProto(time.Duration(cfg.LocalConnectTimeoutMs) * time.Millisecond),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_STATIC},
LoadAssignment: &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{
{
LbEndpoints: []*envoy_endpoint_v3.LbEndpoint{
endpoint,
},
},
},
},
}
protocol := pathProtocol
if protocol == "" {
protocol = cfg.Protocol
}
if protocol == "http2" || protocol == "grpc" {
c.Http2ProtocolOptions = &envoy_core_v3.Http2ProtocolOptions{}
}
return c, err
}
func (s *ResourceGenerator) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream, cfgSnap *proxycfg.ConfigSnapshot) (*envoy_cluster_v3.Cluster, error) {
var c *envoy_cluster_v3.Cluster
var err error
uid := proxycfg.NewUpstreamID(&upstream)
dc := upstream.Datacenter
if dc == "" {
dc = cfgSnap.Datacenter
}
sni := connect.UpstreamSNI(&upstream, "", dc, cfgSnap.Roots.TrustDomain)
cfg, err := structs.ParseUpstreamConfig(upstream.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse", "upstream", uid, "error", err)
}
if cfg.EnvoyClusterJSON != "" {
c, err = makeClusterFromUserConfig(cfg.EnvoyClusterJSON)
if err != nil {
return c, err
}
// In the happy path don't return yet as we need to inject TLS config still.
}
if c == nil {
c = &envoy_cluster_v3.Cluster{
Name: sni,
ConnectTimeout: ptypes.DurationProto(time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
},
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
},
OutlierDetection: ToOutlierDetection(cfg.PassiveHealthCheck),
}
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
c.Http2ProtocolOptions = &envoy_core_v3.Http2ProtocolOptions{}
}
}
endpoints := cfgSnap.ConnectProxy.PreparedQueryEndpoints[uid]
var (
spiffeIDs = make([]connect.SpiffeIDService, 0)
seen = make(map[string]struct{})
)
for _, e := range endpoints {
id := fmt.Sprintf("%s/%s", e.Node.Datacenter, e.Service.CompoundServiceName())
if _, ok := seen[id]; ok {
continue
}
seen[id] = struct{}{}
name := e.Service.Proxy.DestinationServiceName
if e.Service.Connect.Native {
name = e.Service.Service
}
spiffeIDs = append(spiffeIDs, connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: e.Service.NamespaceOrDefault(),
Partition: e.Service.PartitionOrDefault(),
Datacenter: e.Node.Datacenter,
Service: name,
})
}
// Enable TLS upstream with the configured client certificate.
commonTLSContext := makeCommonTLSContextFromLeafWithoutParams(cfgSnap, cfgSnap.Leaf())
err = injectSANMatcher(commonTLSContext, spiffeIDs...)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
return c, nil
}
func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
uid proxycfg.UpstreamID,
upstream *structs.Upstream,
chain *structs.CompiledDiscoveryChain,
chainEndpoints map[string]structs.CheckServiceNodes,
cfgSnap *proxycfg.ConfigSnapshot,
) ([]*envoy_cluster_v3.Cluster, error) {
if chain == nil {
return nil, fmt.Errorf("cannot create upstream cluster without discovery chain for %s", uid)
}
configMap := make(map[string]interface{})
if upstream != nil {
configMap = upstream.Config
}
cfg, err := structs.ParseUpstreamConfigNoDefaults(configMap)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse", "upstream", uid,
"error", err)
}
var escapeHatchCluster *envoy_cluster_v3.Cluster
if cfg.EnvoyClusterJSON != "" {
if chain.IsDefault() {
// If you haven't done anything to setup the discovery chain, then
// you can use the envoy_cluster_json escape hatch.
escapeHatchCluster, err = makeClusterFromUserConfig(cfg.EnvoyClusterJSON)
if err != nil {
return nil, err
}
} else {
s.Logger.Warn("ignoring escape hatch setting, because a discovery chain is configured for",
"discovery chain", chain.ServiceName, "upstream", uid,
"envoy_cluster_json", chain.ServiceName)
}
}
var out []*envoy_cluster_v3.Cluster
for _, node := range chain.Nodes {
if node.Type != structs.DiscoveryGraphNodeTypeResolver {
continue
}
failover := node.Resolver.Failover
targetID := node.Resolver.Target
target := chain.Targets[targetID]
// Determine if we have to generate the entire cluster differently.
failoverThroughMeshGateway := chain.WillFailoverThroughMeshGateway(node)
sni := target.SNI
clusterName := CustomizeClusterName(target.Name, chain)
targetSpiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: target.Namespace,
Partition: target.Partition,
Datacenter: target.Datacenter,
Service: target.Service,
}
if failoverThroughMeshGateway {
actualTargetID := firstHealthyTarget(
chain.Targets,
chainEndpoints,
targetID,
failover.Targets,
)
if actualTargetID != targetID {
actualTarget := chain.Targets[actualTargetID]
sni = actualTarget.SNI
}
}
spiffeIDs := []connect.SpiffeIDService{targetSpiffeID}
seenIDs := map[string]struct{}{
targetSpiffeID.URI().String(): {},
}
if failover != nil {
// When failovers are present we need to add them as valid SANs to validate against.
// Envoy makes the failover decision independently based on the endpoint health it has available.
for _, tid := range failover.Targets {
target, ok := chain.Targets[tid]
if !ok {
continue
}
id := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Namespace: target.Namespace,
Partition: target.Partition,
Datacenter: target.Datacenter,
Service: target.Service,
}
// Failover targets might be subsets of the same service, so these are deduplicated.
if _, ok := seenIDs[id.URI().String()]; ok {
continue
}
seenIDs[id.URI().String()] = struct{}{}
spiffeIDs = append(spiffeIDs, id)
}
}
sort.Slice(spiffeIDs, func(i, j int) bool {
return spiffeIDs[i].URI().String() < spiffeIDs[j].URI().String()
})
s.Logger.Debug("generating cluster for", "cluster", clusterName)
c := &envoy_cluster_v3.Cluster{
Name: clusterName,
AltStatName: clusterName,
ConnectTimeout: ptypes.DurationProto(node.Resolver.ConnectTimeout),
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{
HealthyPanicThreshold: &envoy_type_v3.Percent{
Value: 0, // disable panic threshold
},
},
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
},
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
},
OutlierDetection: ToOutlierDetection(cfg.PassiveHealthCheck),
}
var lb *structs.LoadBalancer
if node.LoadBalancer != nil {
lb = node.LoadBalancer
}
if err := injectLBToCluster(lb, c); err != nil {
return nil, fmt.Errorf("failed to apply load balancer configuration to cluster %q: %v", clusterName, err)
}
proto := cfg.Protocol
if proto == "" {
proto = chain.Protocol
}
if proto == "" {
proto = "tcp"
}
if proto == "http2" || proto == "grpc" {
c.Http2ProtocolOptions = &envoy_core_v3.Http2ProtocolOptions{}
}
commonTLSContext := makeCommonTLSContextFromLeafWithoutParams(cfgSnap, cfgSnap.Leaf())
err = injectSANMatcher(commonTLSContext, spiffeIDs...)
if err != nil {
return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err)
}
tlsContext := &envoy_tls_v3.UpstreamTlsContext{
CommonTlsContext: commonTLSContext,
Sni: sni,
}
transportSocket, err := makeUpstreamTLSTransportSocket(tlsContext)
if err != nil {
return nil, err
}
c.TransportSocket = transportSocket
out = append(out, c)
}
if escapeHatchCluster != nil {
if len(out) != 1 {
return nil, fmt.Errorf("cannot inject escape hatch cluster when discovery chain had no nodes")
}
defaultCluster := out[0]
// Overlay what the user provided.
escapeHatchCluster.TransportSocket = defaultCluster.TransportSocket
out = []*envoy_cluster_v3.Cluster{escapeHatchCluster}
}
return out, nil
}
// injectSANMatcher updates a TLS context so that it verifies the upstream SAN.
func injectSANMatcher(tlsContext *envoy_tls_v3.CommonTlsContext, spiffeIDs ...connect.SpiffeIDService) error {
validationCtx, ok := tlsContext.ValidationContextType.(*envoy_tls_v3.CommonTlsContext_ValidationContext)
if !ok {
return fmt.Errorf("invalid type: expected CommonTlsContext_ValidationContext, got %T",
tlsContext.ValidationContextType)
}
var matchers []*envoy_matcher_v3.StringMatcher
for _, id := range spiffeIDs {
matchers = append(matchers, &envoy_matcher_v3.StringMatcher{
MatchPattern: &envoy_matcher_v3.StringMatcher_Exact{
Exact: id.URI().String(),
},
})
}
validationCtx.ValidationContext.MatchSubjectAltNames = matchers
return nil
}
// makeClusterFromUserConfig returns the listener config decoded from an
// arbitrary proto3 json format string or an error if it's invalid.
//
// For now we only support embedding in JSON strings because of the hcl parsing
// pain (see Background section in the comment for decode.HookWeakDecodeFromSlice).
// This may be fixed in decode.HookWeakDecodeFromSlice in the future.
//
// When we do that we can support just nesting the config directly into the
// JSON/hcl naturally but this is a stop-gap that gets us an escape hatch
// immediately. It's also probably not a bad thing to support long-term since
// any config generated by other systems will likely be in canonical protobuf
// from rather than our slight variant in JSON/hcl.
func makeClusterFromUserConfig(configJSON string) (*envoy_cluster_v3.Cluster, error) {
// Type field is present so decode it as a types.Any
var any any.Any
err := jsonpb.UnmarshalString(configJSON, &any)
if err != nil {
return nil, err
}
// And then unmarshal the listener again...
var c envoy_cluster_v3.Cluster
err = proto.Unmarshal(any.Value, &c)
if err != nil {
return nil, err
}
return &c, err
}
type gatewayClusterOpts struct {
// name for the cluster
name string
// isRemote determines whether the cluster is in a remote DC and we should prefer a WAN address
isRemote bool
// onlyPassing determines whether endpoints that do not have a passing status should be considered unhealthy
onlyPassing bool
// connectTimeout is the timeout for new network connections to hosts in the cluster
connectTimeout time.Duration
// hostnameEndpoints is a list of endpoints with a hostname as their address
hostnameEndpoints structs.CheckServiceNodes
}
// makeGatewayCluster creates an Envoy cluster for a mesh or terminating gateway
func (s *ResourceGenerator) makeGatewayCluster(snap *proxycfg.ConfigSnapshot, opts gatewayClusterOpts) *envoy_cluster_v3.Cluster {
cfg, err := ParseGatewayConfig(snap.Proxy.Config)
if err != nil {
// Don't hard fail on a config typo, just warn. The parse func returns
// default config if there is an error so it's safe to continue.
s.Logger.Warn("failed to parse gateway config", "error", err)
}
if opts.connectTimeout <= 0 {
opts.connectTimeout = time.Duration(cfg.ConnectTimeoutMs) * time.Millisecond
}
cluster := &envoy_cluster_v3.Cluster{
Name: opts.name,
ConnectTimeout: ptypes.DurationProto(opts.connectTimeout),
// Having an empty config enables outlier detection with default config.
OutlierDetection: &envoy_cluster_v3.OutlierDetection{},
}
useEDS := true
if len(opts.hostnameEndpoints) > 0 {
useEDS = false
}
// If none of the service instances are addressed by a hostname we provide the endpoint IP addresses via EDS
if useEDS {
cluster.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}
cluster.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{
EdsConfig: &envoy_core_v3.ConfigSource{
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
Ads: &envoy_core_v3.AggregatedConfigSource{},
},
},
}
return cluster
}
// When a service instance is addressed by a hostname we have Envoy do the DNS resolution
// by setting a DNS cluster type and passing the hostname endpoints via CDS.
rate := 10 * time.Second
cluster.DnsRefreshRate = ptypes.DurationProto(rate)
cluster.DnsLookupFamily = envoy_cluster_v3.Cluster_V4_ONLY
discoveryType := envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_LOGICAL_DNS}
if cfg.DNSDiscoveryType == "strict_dns" {
discoveryType.Type = envoy_cluster_v3.Cluster_STRICT_DNS
}
cluster.ClusterDiscoveryType = &discoveryType
endpoints := make([]*envoy_endpoint_v3.LbEndpoint, 0, 1)
uniqueHostnames := make(map[string]bool)
var (
hostname string
idx int
fallback *envoy_endpoint_v3.LbEndpoint
)
for i, e := range opts.hostnameEndpoints {
_, addr, port := e.BestAddress(opts.isRemote)
uniqueHostnames[addr] = true
health, weight := calculateEndpointHealthAndWeight(e, opts.onlyPassing)
if health == envoy_core_v3.HealthStatus_UNHEALTHY {
fallback = makeLbEndpoint(addr, port, health, weight)
continue
}
if len(endpoints) == 0 {
endpoints = append(endpoints, makeLbEndpoint(addr, port, health, weight))
hostname = addr
idx = i
break
}
}
dc := opts.hostnameEndpoints[idx].Node.Datacenter
service := opts.hostnameEndpoints[idx].Service.CompoundServiceName()
// Fall back to last unhealthy endpoint if none were healthy
if len(endpoints) == 0 {
s.Logger.Warn("upstream service does not contain any healthy instances",
"dc", dc, "service", service.String())
endpoints = append(endpoints, fallback)
}
if len(uniqueHostnames) > 1 {
s.Logger.Warn(fmt.Sprintf("service contains instances with more than one unique hostname; only %q be resolved by Envoy", hostname),
"dc", dc, "service", service.String())
}
cluster.LoadAssignment = &envoy_endpoint_v3.ClusterLoadAssignment{
ClusterName: cluster.Name,
Endpoints: []*envoy_endpoint_v3.LocalityLbEndpoints{
{
LbEndpoints: endpoints,
},
},
}
return cluster
}
func makeThresholdsIfNeeded(limits *structs.UpstreamLimits) []*envoy_cluster_v3.CircuitBreakers_Thresholds {
if limits == nil {
return nil
}
threshold := &envoy_cluster_v3.CircuitBreakers_Thresholds{}
// Likewise, make sure to not set any threshold values on the zero-value in
// order to rely on Envoy defaults
if limits.MaxConnections != nil {
threshold.MaxConnections = makeUint32Value(*limits.MaxConnections)
}
if limits.MaxPendingRequests != nil {
threshold.MaxPendingRequests = makeUint32Value(*limits.MaxPendingRequests)
}
if limits.MaxConcurrentRequests != nil {
threshold.MaxRequests = makeUint32Value(*limits.MaxConcurrentRequests)
}
return []*envoy_cluster_v3.CircuitBreakers_Thresholds{threshold}
}
func makeLbEndpoint(addr string, port int, health envoy_core_v3.HealthStatus, weight int) *envoy_endpoint_v3.LbEndpoint {
return &envoy_endpoint_v3.LbEndpoint{
HostIdentifier: &envoy_endpoint_v3.LbEndpoint_Endpoint{
Endpoint: &envoy_endpoint_v3.Endpoint{
Address: &envoy_core_v3.Address{
Address: &envoy_core_v3.Address_SocketAddress{
SocketAddress: &envoy_core_v3.SocketAddress{
Address: addr,
PortSpecifier: &envoy_core_v3.SocketAddress_PortValue{
PortValue: uint32(port),
},
},
},
},
},
},
HealthStatus: health,
LoadBalancingWeight: makeUint32Value(weight),
}
}
func injectLBToCluster(ec *structs.LoadBalancer, c *envoy_cluster_v3.Cluster) error {
if ec == nil {
return nil
}
switch ec.Policy {
case "":
return nil
case structs.LBPolicyLeastRequest:
c.LbPolicy = envoy_cluster_v3.Cluster_LEAST_REQUEST
if ec.LeastRequestConfig != nil {
c.LbConfig = &envoy_cluster_v3.Cluster_LeastRequestLbConfig_{
LeastRequestLbConfig: &envoy_cluster_v3.Cluster_LeastRequestLbConfig{
ChoiceCount: &wrappers.UInt32Value{Value: ec.LeastRequestConfig.ChoiceCount},
},
}
}
case structs.LBPolicyRoundRobin:
c.LbPolicy = envoy_cluster_v3.Cluster_ROUND_ROBIN
case structs.LBPolicyRandom:
c.LbPolicy = envoy_cluster_v3.Cluster_RANDOM
case structs.LBPolicyRingHash:
c.LbPolicy = envoy_cluster_v3.Cluster_RING_HASH
if ec.RingHashConfig != nil {
c.LbConfig = &envoy_cluster_v3.Cluster_RingHashLbConfig_{
RingHashLbConfig: &envoy_cluster_v3.Cluster_RingHashLbConfig{
MinimumRingSize: &wrappers.UInt64Value{Value: ec.RingHashConfig.MinimumRingSize},
MaximumRingSize: &wrappers.UInt64Value{Value: ec.RingHashConfig.MaximumRingSize},
},
}
}
case structs.LBPolicyMaglev:
c.LbPolicy = envoy_cluster_v3.Cluster_MAGLEV
default:
return fmt.Errorf("unsupported load balancer policy %q for cluster %q", ec.Policy, c.Name)
}
return nil
}