diff --git a/agent/agent.go b/agent/agent.go index e5101ab88..38ca5e4c3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -42,6 +42,7 @@ import ( "github.com/hashicorp/consul/agent/consul/servercert" "github.com/hashicorp/consul/agent/dns" external "github.com/hashicorp/consul/agent/grpc-external" + grpcDNS "github.com/hashicorp/consul/agent/grpc-external/services/dns" "github.com/hashicorp/consul/agent/hcp/scada" libscada "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/consul/agent/local" @@ -920,6 +921,15 @@ func (a *Agent) listenAndServeDNS() error { } }(addr) } + s, _ := NewDNSServer(a) + + grpcDNS.NewServer(grpcDNS.Config{ + Logger: a.logger.Named("grpc-api.dns"), + DNSServeMux: s.mux, + LocalAddress: grpcDNS.Local{IP: net.IPv4(127, 0, 0, 1), Port: a.config.GRPCPort}, + }).Register(a.externalGRPCServer) + + a.dnsServers = append(a.dnsServers, s) // wait for servers to be up timeout := time.After(time.Second) diff --git a/agent/dns.go b/agent/dns.go index b627f1f4a..d458928e8 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -5,16 +5,16 @@ import ( "encoding/hex" "errors" "fmt" + "math" "net" "regexp" "strings" "sync/atomic" "time" + "github.com/armon/go-metrics" "github.com/armon/go-metrics/prometheus" - - metrics "github.com/armon/go-metrics" - radix "github.com/armon/go-radix" + "github.com/armon/go-radix" "github.com/coredns/coredns/plugin/pkg/dnsutil" "github.com/hashicorp/go-hclog" "github.com/miekg/dns" @@ -61,6 +61,13 @@ const ( staleCounterThreshold = 5 * time.Second defaultMaxUDPSize = 512 + + // If a consumer sets a buffer size greater than this amount we will default it down + // to this amount to ensure that consul does respond. Previously if consumer had a larger buffer + // size than 65535 - 60 bytes (maximim 60 bytes for IP header. UDP header will be offset in the + // trimUDP call) consul would fail to respond and the consumer timesout + // the request. + maxUDPDatagramSize = math.MaxUint16 - 68 ) type dnsSOAConfig struct { @@ -139,13 +146,13 @@ func NewDNSServer(a *Agent) (*DNSServer, error) { // Make sure domains are FQDN, make them case insensitive for ServeMux domain := dns.Fqdn(strings.ToLower(a.config.DNSDomain)) altDomain := dns.Fqdn(strings.ToLower(a.config.DNSAltDomain)) - srv := &DNSServer{ agent: a, domain: domain, altDomain: altDomain, logger: a.logger.Named(logging.DNS), defaultEnterpriseMeta: *a.AgentEnterpriseMeta(), + mux: dns.NewServeMux(), } cfg, err := GetDNSConfig(a.config) if err != nil { @@ -153,6 +160,19 @@ func NewDNSServer(a *Agent) (*DNSServer, error) { } srv.config.Store(cfg) + srv.mux.HandleFunc("arpa.", srv.handlePtr) + srv.mux.HandleFunc(srv.domain, srv.handleQuery) + // this is not an empty string check because NewDNSServer will have + // converted the configured alt domain into an FQDN which will ensure that + // the value ends with a ".". Therefore "." is the empty string equivalent + // for originally having no alternate domain set. If there is a reason + // why consul should be configured to handle the root zone I have yet + // to think of it. + if srv.altDomain != "." { + srv.mux.HandleFunc(srv.altDomain, srv.handleQuery) + } + srv.toggleRecursorHandlerFromConfig(cfg) + return srv, nil } @@ -227,22 +247,6 @@ func (cfg *dnsConfig) GetTTLForService(service string) (time.Duration, bool) { } func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error { - cfg := d.config.Load().(*dnsConfig) - - d.mux = dns.NewServeMux() - d.mux.HandleFunc("arpa.", d.handlePtr) - d.mux.HandleFunc(d.domain, d.handleQuery) - // this is not an empty string check because NewDNSServer will have - // converted the configured alt domain into an FQDN which will ensure that - // the value ends with a ".". Therefore "." is the empty string equivalent - // for originally having no alternate domain set. If there is a reason - // why consul should be configured to handle the root zone I have yet - // to think of it. - if d.altDomain != "." { - d.mux.HandleFunc(d.altDomain, d.handleQuery) - } - d.toggleRecursorHandlerFromConfig(cfg) - d.Server = &dns.Server{ Addr: addr, Net: network, @@ -1258,6 +1262,11 @@ func trimUDPResponse(req, resp *dns.Msg, udpAnswerLimit int) (trimmed bool) { maxSize = int(size) } } + // Overriding maxSize as the maxSize cannot be larger than the + // maxUDPDatagram size. Reliability guarantees disappear > than this amount. + if maxSize > maxUDPDatagramSize { + maxSize = maxUDPDatagramSize + } // We avoid some function calls and allocations by only handling the // extra data when necessary. @@ -1286,8 +1295,9 @@ func trimUDPResponse(req, resp *dns.Msg, udpAnswerLimit int) (trimmed bool) { // will allow our responses to be compliant even if some downstream server // uncompresses them. // Even when size is too big for one single record, try to send it anyway - // (useful for 512 bytes messages) - for len(resp.Answer) > 1 && resp.Len() > maxSize-7 { + // (useful for 512 bytes messages). 8 is removed from maxSize to ensure that we account + // for the udp header (8 bytes). + for len(resp.Answer) > 1 && resp.Len() > maxSize-8 { // first try to remove the NS section may be it will truncate enough if len(resp.Ns) != 0 { resp.Ns = []dns.RR{} diff --git a/agent/dns_test.go b/agent/dns_test.go index 9f876eaeb..2f2499a2e 100644 --- a/agent/dns_test.go +++ b/agent/dns_test.go @@ -3,6 +3,7 @@ package agent import ( "errors" "fmt" + "math" "math/rand" "net" "reflect" @@ -7563,6 +7564,55 @@ func TestDNS_trimUDPResponse_TrimSizeEDNS(t *testing.T) { } } +func TestDNS_trimUDPResponse_TrimSizeMaxSize(t *testing.T) { + t.Parallel() + cfg := loadRuntimeConfig(t, `node_name = "test" data_dir = "a" bind_addr = "127.0.0.1" node_name = "dummy"`) + + resp := &dns.Msg{} + + for i := 0; i < 600; i++ { + target := fmt.Sprintf("ip-10-0-1-%d.node.dc1.consul.", 150+i) + srv := &dns.SRV{ + Hdr: dns.RR_Header{ + Name: "redis-cache-redis.service.consul.", + Rrtype: dns.TypeSRV, + Class: dns.ClassINET, + }, + Target: target, + } + a := &dns.A{ + Hdr: dns.RR_Header{ + Name: target, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + }, + A: net.ParseIP(fmt.Sprintf("10.0.1.%d", 150+i)), + } + + resp.Answer = append(resp.Answer, srv) + resp.Extra = append(resp.Extra, a) + } + + reqEDNS, respEDNS := &dns.Msg{}, &dns.Msg{} + reqEDNS.SetEdns0(math.MaxUint16, true) + respEDNS.Answer = append(respEDNS.Answer, resp.Answer...) + respEDNS.Extra = append(respEDNS.Extra, resp.Extra...) + require.Greater(t, respEDNS.Len(), math.MaxUint16) + t.Logf("length is: %v", respEDNS.Len()) + + if trimmed := trimUDPResponse(reqEDNS, respEDNS, cfg.DNSUDPAnswerLimit); !trimmed { + t.Errorf("expected edns to be trimmed: %#v", resp) + } + require.Greater(t, math.MaxUint16, respEDNS.Len()) + + t.Logf("length is: %v", respEDNS.Len()) + + if len(respEDNS.Answer) == 0 || len(respEDNS.Answer) != len(respEDNS.Extra) { + t.Errorf("bad edns answer length: %#v", resp) + } + +} + func TestDNS_syncExtra(t *testing.T) { t.Parallel() resp := &dns.Msg{