DNS filters service nodes if they have failing checks

This commit is contained in:
Armon Dadgar 2014-01-15 11:30:04 -10:00
parent 9af24bd4c2
commit e3159d0318
2 changed files with 65 additions and 0 deletions

View File

@ -315,6 +315,9 @@ func (d *DNSServer) serviceLookup(datacenter, service, tag string, req, resp *dn
return return
} }
// Filter out any service nodes due to health checks
out = d.filterServiceNodes(out)
// Add various responses depending on the request // Add various responses depending on the request
qType := req.Question[0].Qtype qType := req.Question[0].Qtype
if qType == dns.TypeANY || qType == dns.TypeA { if qType == dns.TypeANY || qType == dns.TypeA {
@ -325,6 +328,24 @@ func (d *DNSServer) serviceLookup(datacenter, service, tag string, req, resp *dn
} }
} }
// filterServiceNodes is used to filter out nodes that are failing
// health checks to prevent routing to unhealthy nodes
func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes {
n := len(nodes)
for i := 0; i < n; i++ {
node := nodes[i]
for _, check := range node.Checks {
if check.Status == structs.HealthCritical {
d.logger.Printf("[WARN] dns: node '%s' failing health check '%s: %s', dropping from service '%s'",
node.Node.Node, check.CheckID, check.Name, node.Service.Service)
nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{}
n--
}
}
}
return nodes[:n]
}
// serviceARecords is used to add the A records for a service lookup // serviceARecords is used to add the A records for a service lookup
func (d *DNSServer) serviceARecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) { func (d *DNSServer) serviceARecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) {
handled := make(map[string]struct{}) handled := make(map[string]struct{})

View File

@ -308,3 +308,47 @@ func TestDNS_Recurse(t *testing.T) {
t.Fatalf("Bad: %#v", in) t.Fatalf("Bad: %#v", in)
} }
} }
func TestDNS_ServiceLookup_FilterCritical(t *testing.T) {
dir, srv := makeDNSServer(t)
defer os.RemoveAll(dir)
defer srv.agent.Shutdown()
// Wait for leader
time.Sleep(100 * time.Millisecond)
// Register node
args := &structs.RegisterRequest{
Datacenter: "dc1",
Node: "foo",
Address: "127.0.0.1",
Service: &structs.NodeService{
Service: "db",
Tag: "master",
Port: 12345,
},
Check: &structs.HealthCheck{
CheckID: "serf",
Name: "serf",
Status: structs.HealthCritical,
},
}
var out struct{}
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
t.Fatalf("err: %v", err)
}
m := new(dns.Msg)
m.SetQuestion("db.service.consul.", dns.TypeANY)
c := new(dns.Client)
in, _, err := c.Exchange(m, srv.agent.config.DNSAddr)
if err != nil {
t.Fatalf("err: %v", err)
}
// Should get no answer since we are failing!
if len(in.Answer) != 0 {
t.Fatalf("Bad: %#v", in)
}
}