From a7de87e95b74c33fae0e0cc6bbf71c3e4f64e00a Mon Sep 17 00:00:00 2001 From: freddygv Date: Wed, 30 Jun 2021 10:16:33 -0600 Subject: [PATCH] Validate SANs for passthrough clusters and failovers --- agent/proxycfg/snapshot.go | 4 ++ agent/proxycfg/state_test.go | 6 +++ agent/proxycfg/upstreams.go | 11 +++- agent/xds/clusters.go | 70 ++++++++++++++++++++------ agent/xds/clusters_test.go | 12 +++++ agent/xds/xds_protocol_helpers_test.go | 23 ++++++--- 6 files changed, 103 insertions(+), 23 deletions(-) diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 991ef38fd..87e7326e8 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -3,6 +3,7 @@ package proxycfg import ( "context" "fmt" + "github.com/hashicorp/consul/agent/connect" "sort" "github.com/mitchellh/copystructure" @@ -57,6 +58,9 @@ type ServicePassthroughAddrs struct { // SNI is the Service SNI of the upstream. SNI string + // SpiffeID is the SPIFFE ID to use for upstream SAN validation. + SpiffeID connect.SpiffeIDService + // Addrs is a set of the best LAN addresses for the instances of the upstream. Addrs map[string]struct{} } diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index e83b1f4d1..9cc21b1ab 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -1868,6 +1868,12 @@ func TestState_WatchesAndUpdates(t *testing.T) { require.Equal(t, snap.ConnectProxy.PassthroughUpstreams, map[string]ServicePassthroughAddrs{ db.String(): { SNI: connect.ServiceSNI("db", "", structs.IntentionDefaultNamespace, snap.Datacenter, snap.Roots.TrustDomain), + SpiffeID: connect.SpiffeIDService{ + Host: snap.Roots.TrustDomain, + Namespace: structs.IntentionDefaultNamespace, + Datacenter: snap.Datacenter, + Service: "db", + }, Addrs: map[string]struct{}{ "10.10.10.10": {}, "10.0.0.2": {}, diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go index a4ac6625d..d3557e527 100644 --- a/agent/proxycfg/upstreams.go +++ b/agent/proxycfg/upstreams.go @@ -94,9 +94,18 @@ func (s *handlerUpstreams) handleUpdateUpstreams(ctx context.Context, u cache.Up snap.Datacenter, snap.Roots.TrustDomain) + spiffeID := connect.SpiffeIDService{ + Host: snap.Roots.TrustDomain, + Partition: "", + Namespace: svc.NamespaceOrDefault(), + Datacenter: snap.Datacenter, + Service: svc.Name, + } + if _, ok := upstreamsSnapshot.PassthroughUpstreams[svc.String()]; !ok { upstreamsSnapshot.PassthroughUpstreams[svc.String()] = ServicePassthroughAddrs{ - SNI: sni, + SNI: sni, + SpiffeID: spiffeID, // Stored in a set because it's possible for these to be duplicated // when the upstream-target is targeted by multiple discovery chains. diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index 26209d9cf..fbd428d70 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -3,6 +3,7 @@ package xds import ( "errors" "fmt" + "sort" "time" envoy_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" @@ -206,8 +207,13 @@ func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, ConnectTimeout: ptypes.DurationProto(5 * time.Second), } + commonTLSContext := makeCommonTLSContextFromLeaf(cfgSnap, cfgSnap.Leaf()) + err := injectSANMatcher(commonTLSContext, passthrough.SpiffeID) + if err != nil { + return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", passthrough.SNI, err) + } tlsContext := envoy_tls_v3.UpstreamTlsContext{ - CommonTlsContext: makeCommonTLSContextFromLeaf(cfgSnap, cfgSnap.Leaf()), + CommonTlsContext: commonTLSContext, Sni: passthrough.SNI, } transportSocket, err := makeUpstreamTLSTransportSocket(&tlsContext) @@ -538,7 +544,7 @@ func (s *ResourceGenerator) makeUpstreamClusterForPreparedQuery(upstream structs // Enable TLS upstream with the configured client certificate. commonTLSContext := makeCommonTLSContextFromLeaf(cfgSnap, cfgSnap.Leaf()) - err = injectSANMatcher(commonTLSContext, spiffeID.URI().String()) + err = injectSANMatcher(commonTLSContext, spiffeID) if err != nil { return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err) } @@ -612,7 +618,7 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain( sni := target.SNI clusterName := CustomizeClusterName(target.Name, chain) - spiffeID := connect.SpiffeIDService{ + targetSpiffeID := connect.SpiffeIDService{ Host: cfgSnap.Roots.TrustDomain, Namespace: target.Namespace, Datacenter: target.Datacenter, @@ -630,16 +636,43 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain( if actualTargetID != targetID { actualTarget := chain.Targets[actualTargetID] sni = actualTarget.SNI - - spiffeID = connect.SpiffeIDService{ - Host: cfgSnap.Roots.TrustDomain, - Namespace: actualTarget.Namespace, - Datacenter: actualTarget.Datacenter, - Service: actualTarget.Service, - } } } + spiffeIDs := []connect.SpiffeIDService{targetSpiffeID} + seenIDs := map[string]struct{}{ + targetSpiffeID.URI().String(): {}, + } + + if failover != nil { + // When failovers are present we need to add them as valid SANs to validate against. + // Envoy makes the failover decision independently based on the endpoint health it has available. + for _, tid := range failover.Targets { + target, ok := chain.Targets[tid] + if !ok { + continue + } + + id := connect.SpiffeIDService{ + Host: cfgSnap.Roots.TrustDomain, + Namespace: target.Namespace, + Datacenter: target.Datacenter, + Service: target.Service, + } + + // Failover targets might be subsets of the same service, so these are deduplicated. + if _, ok := seenIDs[id.URI().String()]; ok { + continue + } + seenIDs[id.URI().String()] = struct{}{} + + spiffeIDs = append(spiffeIDs, id) + } + } + sort.Slice(spiffeIDs, func(i, j int) bool { + return spiffeIDs[i].URI().String() < spiffeIDs[j].URI().String() + }) + s.Logger.Debug("generating cluster for", "cluster", clusterName) c := &envoy_cluster_v3.Cluster{ Name: clusterName, @@ -687,7 +720,7 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain( } commonTLSContext := makeCommonTLSContextFromLeaf(cfgSnap, cfgSnap.Leaf()) - err = injectSANMatcher(commonTLSContext, spiffeID.URI().String()) + err = injectSANMatcher(commonTLSContext, spiffeIDs...) if err != nil { return nil, fmt.Errorf("failed to inject SAN matcher rules for cluster %q: %v", sni, err) } @@ -722,18 +755,23 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain( } // injectSANMatcher updates a TLS context so that it verifies the upstream SAN. -func injectSANMatcher(tlsContext *envoy_tls_v3.CommonTlsContext, uri string) error { +func injectSANMatcher(tlsContext *envoy_tls_v3.CommonTlsContext, spiffeIDs ...connect.SpiffeIDService) error { validationCtx, ok := tlsContext.ValidationContextType.(*envoy_tls_v3.CommonTlsContext_ValidationContext) if !ok { return fmt.Errorf("invalid type: expected CommonTlsContext_ValidationContext, got %T", tlsContext.ValidationContextType) } - validationCtx.ValidationContext.MatchSubjectAltNames = []*envoy_matcher_v3.StringMatcher{ - { - MatchPattern: &envoy_matcher_v3.StringMatcher_Exact{Exact: uri}, - }, + var matchers []*envoy_matcher_v3.StringMatcher + for _, id := range spiffeIDs { + matchers = append(matchers, &envoy_matcher_v3.StringMatcher{ + MatchPattern: &envoy_matcher_v3.StringMatcher_Exact{ + Exact: id.URI().String(), + }, + }) } + validationCtx.ValidationContext.MatchSubjectAltNames = matchers + return nil } diff --git a/agent/xds/clusters_test.go b/agent/xds/clusters_test.go index 443a796da..cc3685399 100644 --- a/agent/xds/clusters_test.go +++ b/agent/xds/clusters_test.go @@ -671,12 +671,24 @@ func TestClustersFromSnapshot(t *testing.T) { snap.ConnectProxy.PassthroughUpstreams = map[string]proxycfg.ServicePassthroughAddrs{ "default/kafka": { SNI: "kafka.default.dc1.internal.e5b08d03-bfc3-c870-1833-baddb116e648.consul", + SpiffeID: connect.SpiffeIDService{ + Host: "e5b08d03-bfc3-c870-1833-baddb116e648.consul", + Namespace: "default", + Datacenter: "dc1", + Service: "kafka", + }, Addrs: map[string]struct{}{ "9.9.9.9": {}, }, }, "default/mongo": { SNI: "mongo.default.dc1.internal.e5b08d03-bfc3-c870-1833-baddb116e648.consul", + SpiffeID: connect.SpiffeIDService{ + Host: "e5b08d03-bfc3-c870-1833-baddb116e648.consul", + Namespace: "default", + Datacenter: "dc1", + Service: "mongo", + }, Addrs: map[string]struct{}{ "10.10.10.10": {}, "10.10.10.12": {}, diff --git a/agent/xds/xds_protocol_helpers_test.go b/agent/xds/xds_protocol_helpers_test.go index 734f3e36b..cdf3b257b 100644 --- a/agent/xds/xds_protocol_helpers_test.go +++ b/agent/xds/xds_protocol_helpers_test.go @@ -1,6 +1,7 @@ package xds import ( + "github.com/hashicorp/consul/agent/connect" "sort" "sync" "testing" @@ -242,14 +243,14 @@ func xdsNewPublicTransportSocket( t *testing.T, snap *proxycfg.ConfigSnapshot, ) *envoy_core_v3.TransportSocket { - return xdsNewTransportSocket(t, snap, true, true, "", "") + return xdsNewTransportSocket(t, snap, true, true, "", connect.SpiffeIDService{}) } func xdsNewUpstreamTransportSocket( t *testing.T, snap *proxycfg.ConfigSnapshot, sni string, - uri string, + uri connect.SpiffeIDService, ) *envoy_core_v3.TransportSocket { return xdsNewTransportSocket(t, snap, false, false, sni, uri) } @@ -260,7 +261,7 @@ func xdsNewTransportSocket( downstream bool, requireClientCert bool, sni string, - uri string, + uri connect.SpiffeIDService, ) *envoy_core_v3.TransportSocket { // Assume just one root for now, can get fancier later if needed. caPEM := snap.Roots.Roots[0].RootCert @@ -277,7 +278,7 @@ func xdsNewTransportSocket( }, }, } - if uri != "" { + if uri.Service != "" { require.NoError(t, injectSANMatcher(commonTLSContext, uri)) } @@ -363,10 +364,20 @@ func makeTestResource(t *testing.T, raw interface{}) *envoy_discovery_v3.Resourc func makeTestCluster(t *testing.T, snap *proxycfg.ConfigSnapshot, fixtureName string) *envoy_cluster_v3.Cluster { var ( dbSNI = "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul" - dbURI = "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/db" + dbURI = connect.SpiffeIDService{ + Host: "11111111-2222-3333-4444-555555555555.consul", + Namespace: "default", + Datacenter: "dc1", + Service: "db", + } geocacheSNI = "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul" - geocacheURI = "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/geo-cache" + geocacheURI = connect.SpiffeIDService{ + Host: "11111111-2222-3333-4444-555555555555.consul", + Namespace: "default", + Datacenter: "dc1", + Service: "geo-cache", + } ) switch fixtureName {