From ed9808c4f1398d61943b4d16fc98cecb36ebd903 Mon Sep 17 00:00:00 2001 From: Freddy Date: Wed, 6 Jul 2022 10:30:04 -0600 Subject: [PATCH] Parse peer name for virtual IP DNS queries (#13602) This commit updates the DNS query locality parsing so that the virtual IP for an imported service can be queried. Note that: - Support for parsing a peer in other service discovery queries was not added. - Querying another datacenter for a virtual IP is not supported. This was technically allowed in 1.11 but is being rolled back for 1.13 because it is not a use-case we intended to support. Virtual IPs in different datacenters are going to collide because they are allocated sequentially. --- agent/dns.go | 81 +++++++++++++++------- agent/dns_oss.go | 14 ++-- agent/dns_test.go | 94 +++++++++++++++++++++++--- website/content/docs/discovery/dns.mdx | 4 +- 4 files changed, 152 insertions(+), 41 deletions(-) diff --git a/agent/dns.go b/agent/dns.go index 9d0d3b9a5..b627f1f4a 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -676,15 +676,34 @@ func (e ecsNotGlobalError) Unwrap() error { return e.error } +type queryLocality struct { + // datacenter is the datacenter parsed from a label that has an explicit datacenter part. + // Example query: .virtual..ns..ap..dc.consul + datacenter string + + // peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are specified in the same query part. + // Example query: .virtual..consul + peerOrDatacenter string + + acl.EnterpriseMeta +} + +func (l queryLocality) effectiveDatacenter(defaultDC string) string { + // Prefer the value parsed from a query with explicit parts: .ns..ap..dc + if l.datacenter != "" { + return l.datacenter + } + // Fall back to the ambiguously parsed DC or Peer. + if l.peerOrDatacenter != "" { + return l.peerOrDatacenter + } + // If all are empty, use a default value. + return defaultDC +} + // dispatch is used to parse a request and invoke the correct handler. // parameter maxRecursionLevel will handle whether recursive call can be performed func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursionLevel int) error { - // By default the query is in the default datacenter - datacenter := d.agent.config.Datacenter - - // have to deref to clone it so we don't modify (start from the agent's defaults) - var entMeta = d.defaultEnterpriseMeta - // Choose correct response domain respDomain := d.getResponseDomain(req.Question[0].Name) @@ -733,16 +752,17 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi return invalid() } - if !d.parseDatacenterAndEnterpriseMeta(querySuffixes, cfg, &datacenter, &entMeta) { + locality, ok := d.parseLocality(querySuffixes, cfg) + if !ok { return invalid() } lookup := serviceLookup{ - Datacenter: datacenter, + Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter), Connect: false, Ingress: false, MaxRecursionLevel: maxRecursionLevel, - EnterpriseMeta: entMeta, + EnterpriseMeta: locality.EnterpriseMeta, } // Support RFC 2782 style syntax if n == 2 && strings.HasPrefix(queryParts[1], "_") && strings.HasPrefix(queryParts[0], "_") { @@ -779,17 +799,18 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi return invalid() } - if !d.parseDatacenterAndEnterpriseMeta(querySuffixes, cfg, &datacenter, &entMeta) { + locality, ok := d.parseLocality(querySuffixes, cfg) + if !ok { return invalid() } lookup := serviceLookup{ - Datacenter: datacenter, + Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter), Service: queryParts[len(queryParts)-1], Connect: true, Ingress: false, MaxRecursionLevel: maxRecursionLevel, - EnterpriseMeta: entMeta, + EnterpriseMeta: locality.EnterpriseMeta, } // name.connect.consul return d.serviceLookup(cfg, lookup, req, resp) @@ -799,14 +820,18 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi return invalid() } - if !d.parseDatacenterAndEnterpriseMeta(querySuffixes, cfg, &datacenter, &entMeta) { + locality, ok := d.parseLocality(querySuffixes, cfg) + if !ok { return invalid() } args := structs.ServiceSpecificRequest{ - Datacenter: datacenter, + // The datacenter of the request is not specified because cross-datacenter virtual IP + // queries are not supported. This guard rail is in place because virtual IPs are allocated + // within a DC, therefore their uniqueness is not guaranteed globally. + PeerName: locality.peerOrDatacenter, ServiceName: queryParts[len(queryParts)-1], - EnterpriseMeta: entMeta, + EnterpriseMeta: locality.EnterpriseMeta, QueryOptions: structs.QueryOptions{ Token: d.agent.tokens.UserToken(), }, @@ -834,17 +859,18 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi return invalid() } - if !d.parseDatacenterAndEnterpriseMeta(querySuffixes, cfg, &datacenter, &entMeta) { + locality, ok := d.parseLocality(querySuffixes, cfg) + if !ok { return invalid() } lookup := serviceLookup{ - Datacenter: datacenter, + Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter), Service: queryParts[len(queryParts)-1], Connect: false, Ingress: true, MaxRecursionLevel: maxRecursionLevel, - EnterpriseMeta: entMeta, + EnterpriseMeta: locality.EnterpriseMeta, } // name.ingress.consul return d.serviceLookup(cfg, lookup, req, resp) @@ -854,13 +880,14 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi return invalid() } - if !d.parseDatacenterAndEnterpriseMeta(querySuffixes, cfg, &datacenter, &entMeta) { + locality, ok := d.parseLocality(querySuffixes, cfg) + if !ok { return invalid() } - // Namespace should not be set for node queries - ns := entMeta.NamespaceOrEmpty() - if ns != "" && ns != acl.DefaultNamespaceName { + // Nodes are only registered in the default namespace so queries + // must not specify a non-default namespace. + if !locality.InDefaultNamespace() { return invalid() } @@ -868,15 +895,17 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi node := strings.Join(queryParts, ".") lookup := nodeLookup{ - Datacenter: datacenter, + Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter), Node: node, MaxRecursionLevel: maxRecursionLevel, - EnterpriseMeta: entMeta, + EnterpriseMeta: locality.EnterpriseMeta, } return d.nodeLookup(cfg, lookup, req, resp) case "query": + datacenter := d.agent.config.Datacenter + // ensure we have a query name if len(queryParts) < 1 { return invalid() @@ -905,7 +934,7 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi if err != nil { return invalid() } - //check if the query type is A for IPv4 or ANY + // check if the query type is A for IPv4 or ANY aRecord := &dns.A{ Hdr: dns.RR_Header{ Name: qName + respDomain, @@ -926,7 +955,7 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi if err != nil { return invalid() } - //check if the query type is AAAA for IPv6 or ANY + // check if the query type is AAAA for IPv6 or ANY aaaaRecord := &dns.AAAA{ Hdr: dns.RR_Header{ Name: qName + respDomain, diff --git a/agent/dns_oss.go b/agent/dns_oss.go index 9476e810f..de257aa4c 100644 --- a/agent/dns_oss.go +++ b/agent/dns_oss.go @@ -16,15 +16,19 @@ func getEnterpriseDNSConfig(conf *config.RuntimeConfig) enterpriseDNSConfig { return enterpriseDNSConfig{} } -func (d *DNSServer) parseDatacenterAndEnterpriseMeta(labels []string, _ *dnsConfig, datacenter *string, _ *acl.EnterpriseMeta) bool { +// parseLocality can parse peer name or datacenter from a DNS query's labels. +// Peer name is parsed from the same query part that datacenter is, so given this ambiguity +// we parse a "peerOrDatacenter". The caller or RPC handler are responsible for disambiguating. +func (d *DNSServer) parseLocality(labels []string, cfg *dnsConfig) (queryLocality, bool) { switch len(labels) { case 1: - *datacenter = labels[0] - return true + return queryLocality{peerOrDatacenter: labels[0]}, true + case 0: - return true + return queryLocality{}, true } - return false + + return queryLocality{}, false } func serviceCanonicalDNSName(name, kind, datacenter, domain string, _ *acl.EnterpriseMeta) string { diff --git a/agent/dns_test.go b/agent/dns_test.go index 6d4085833..f0d82d2e7 100644 --- a/agent/dns_test.go +++ b/agent/dns_test.go @@ -11,6 +11,7 @@ import ( "testing" "time" + "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/serf/coordinate" "github.com/miekg/dns" "github.com/stretchr/testify/require" @@ -458,7 +459,7 @@ func TestDNSCycleRecursorCheck(t *testing.T) { }, }) defer server2.Shutdown() - //Mock the agent startup with the necessary configs + // Mock the agent startup with the necessary configs agent := NewTestAgent(t, `recursors = ["`+server1.Addr+`", "`+server2.Addr+`"] `) @@ -496,7 +497,7 @@ func TestDNSCycleRecursorCheckAllFail(t *testing.T) { MsgHdr: dns.MsgHdr{Rcode: dns.RcodeRefused}, }) defer server3.Shutdown() - //Mock the agent startup with the necessary configs + // Mock the agent startup with the necessary configs agent := NewTestAgent(t, `recursors = ["`+server1.Addr+`", "`+server2.Addr+`","`+server3.Addr+`"] `) @@ -507,7 +508,7 @@ func TestDNSCycleRecursorCheckAllFail(t *testing.T) { // Agent request client := new(dns.Client) in, _, _ := client.Exchange(m, agent.DNSAddr()) - //Verify if we hit SERVFAIL from Consul + // Verify if we hit SERVFAIL from Consul require.Equal(t, dns.RcodeServerFailure, in.Rcode) } func TestDNS_NodeLookup_CNAME(t *testing.T) { @@ -1756,14 +1757,42 @@ func TestDNS_ConnectServiceLookup(t *testing.T) { require.Equal(t, uint32(0), srvRec.Hdr.Ttl) require.Equal(t, "127.0.0.55", cnameRec.A.String()) } +} - // Look up the virtual IP of the proxy. - questions = []string{ - "db.virtual.consul.", +func TestDNS_VirtualIPLookup(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") } - for _, question := range questions { + + t.Parallel() + + a := NewTestAgent(t, "") + defer a.Shutdown() + testrpc.WaitForLeader(t, a.RPC, "dc1") + + server, ok := a.delegate.(*consul.Server) + require.True(t, ok) + + // The proxy service will not receive a virtual IP if the server is not assigning virtual IPs yet. + retry.Run(t, func(r *retry.R) { + _, entry, err := server.FSM().State().SystemMetadataGet(nil, structs.SystemMetadataVirtualIPsEnabled) + require.NoError(r, err) + require.NotNil(r, entry) + }) + + type testCase struct { + name string + reg *structs.RegisterRequest + question string + expect string + } + + run := func(t *testing.T, tc testCase) { + var out struct{} + require.Nil(t, a.RPC("Catalog.Register", tc.reg, &out)) + m := new(dns.Msg) - m.SetQuestion(question, dns.TypeA) + m.SetQuestion(tc.question, dns.TypeA) c := new(dns.Client) in, _, err := c.Exchange(m, a.DNSAddr()) @@ -1772,7 +1801,54 @@ func TestDNS_ConnectServiceLookup(t *testing.T) { aRec, ok := in.Answer[0].(*dns.A) require.True(t, ok) - require.Equal(t, "240.0.0.1", aRec.A.String()) + require.Equal(t, tc.expect, aRec.A.String()) + } + + tt := []testCase{ + { + name: "local query", + reg: &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.55", + Service: &structs.NodeService{ + Kind: structs.ServiceKindConnectProxy, + Service: "web-proxy", + Port: 12345, + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: "db", + }, + }, + }, + question: "db.virtual.consul.", + expect: "240.0.0.1", + }, + { + name: "query for imported service", + reg: &structs.RegisterRequest{ + PeerName: "frontend", + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.55", + Service: &structs.NodeService{ + PeerName: "frontend", + Kind: structs.ServiceKindConnectProxy, + Service: "web-proxy", + Port: 12345, + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: "db", + }, + }, + }, + question: "db.virtual.frontend.consul.", + expect: "240.0.0.2", + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) } } diff --git a/website/content/docs/discovery/dns.mdx b/website/content/docs/discovery/dns.mdx index 17c25e6e6..471b9f9a2 100644 --- a/website/content/docs/discovery/dns.mdx +++ b/website/content/docs/discovery/dns.mdx @@ -376,12 +376,14 @@ If you need more complex behavior, please use the To find the unique virtual IP allocated for a service: ```text -.virtual. +.virtual[.peer]. ``` This will return the unique virtual IP for any [Connect-capable](/docs/connect) service. Each Connect service has a virtual IP assigned to it by Consul - this is used by sidecar proxies for the [Transparent Proxy](/docs/connect/transparent-proxy) feature. +The peer name is an optional part of the FQDN, and it is used to query for the virtual IP +of a service imported from that peer. The virtual IP is also added to the service's [Tagged Addresses](/docs/discovery/services#tagged-addresses) under the `consul-virtual` tag.