From e3159d031800f30f6be98ec6599d639056f42b86 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 15 Jan 2014 11:30:04 -1000 Subject: [PATCH] DNS filters service nodes if they have failing checks --- command/agent/dns.go | 21 +++++++++++++++++++ command/agent/dns_test.go | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/command/agent/dns.go b/command/agent/dns.go index dba09276e..ccf970634 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -315,6 +315,9 @@ func (d *DNSServer) serviceLookup(datacenter, service, tag string, req, resp *dn return } + // Filter out any service nodes due to health checks + out = d.filterServiceNodes(out) + // Add various responses depending on the request qType := req.Question[0].Qtype if qType == dns.TypeANY || qType == dns.TypeA { @@ -325,6 +328,24 @@ func (d *DNSServer) serviceLookup(datacenter, service, tag string, req, resp *dn } } +// filterServiceNodes is used to filter out nodes that are failing +// health checks to prevent routing to unhealthy nodes +func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { + n := len(nodes) + for i := 0; i < n; i++ { + node := nodes[i] + for _, check := range node.Checks { + if check.Status == structs.HealthCritical { + d.logger.Printf("[WARN] dns: node '%s' failing health check '%s: %s', dropping from service '%s'", + node.Node.Node, check.CheckID, check.Name, node.Service.Service) + nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} + n-- + } + } + } + return nodes[:n] +} + // serviceARecords is used to add the A records for a service lookup func (d *DNSServer) serviceARecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) { handled := make(map[string]struct{}) diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index e74babad1..30c94b640 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -308,3 +308,47 @@ func TestDNS_Recurse(t *testing.T) { t.Fatalf("Bad: %#v", in) } } + +func TestDNS_ServiceLookup_FilterCritical(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + // Wait for leader + time.Sleep(100 * time.Millisecond) + + // Register node + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + Service: "db", + Tag: "master", + Port: 12345, + }, + Check: &structs.HealthCheck{ + CheckID: "serf", + Name: "serf", + Status: structs.HealthCritical, + }, + } + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + m := new(dns.Msg) + m.SetQuestion("db.service.consul.", dns.TypeANY) + + c := new(dns.Client) + in, _, err := c.Exchange(m, srv.agent.config.DNSAddr) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Should get no answer since we are failing! + if len(in.Answer) != 0 { + t.Fatalf("Bad: %#v", in) + } +}