Add DNS recursor strategy option (#10611)

This change adds a new `dns_config.recursor_strategy` option which
controls how Consul queries DNS resolvers listed in the `recursors`
config option. The supported options are `sequential` (default), and
`random`.

Closes #8807

Co-authored-by: Blake Covarrubias <blake@covarrubi.as>
Co-authored-by: Priyanka Sengupta <psengupta@flatiron.com>
This commit is contained in:
Blake Covarrubias 2021-07-19 15:22:51 -07:00 committed by GitHub
parent 4d2bc76d62
commit 441a6c9969
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 127 additions and 19 deletions

3
.changelog/10611.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
config: add `dns_config.recursor_strategy` flag to control the order which DNS recursors are queried
```

View File

@ -908,6 +908,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL),
DNSOnlyPassing: boolVal(c.DNS.OnlyPassing),
DNSPort: dnsPort,
DNSRecursorStrategy: b.dnsRecursorStrategyVal(stringVal(c.DNS.RecursorStrategy)),
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
DNSRecursors: dnsRecursors,
DNSServiceTTL: dnsServiceTTL,
@ -1745,6 +1746,20 @@ func (b *builder) meshGatewayConfVal(mgConf *MeshGatewayConfig) structs.MeshGate
return cfg
}
func (b *builder) dnsRecursorStrategyVal(v string) dns.RecursorStrategy {
var out dns.RecursorStrategy
switch dns.RecursorStrategy(v) {
case dns.RecursorStrategyRandom:
out = dns.RecursorStrategyRandom
case dns.RecursorStrategySequential, "":
out = dns.RecursorStrategySequential
default:
b.err = multierror.Append(b.err, fmt.Errorf("dns_config.recursor_strategy: invalid strategy: %q", v))
}
return out
}
func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig {
var out structs.ExposeConfig
if v == nil {

View File

@ -634,6 +634,7 @@ type DNS struct {
MaxStale *string `mapstructure:"max_stale"`
NodeTTL *string `mapstructure:"node_ttl"`
OnlyPassing *bool `mapstructure:"only_passing"`
RecursorStrategy *string `mapstructure:"recursor_strategy"`
RecursorTimeout *string `mapstructure:"recursor_timeout"`
ServiceTTL map[string]string `mapstructure:"service_ttl"`
UDPAnswerLimit *int `mapstructure:"udp_answer_limit"`

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
@ -270,6 +271,15 @@ type RuntimeConfig struct {
// hcl: dns_config { only_passing = (true|false) }
DNSOnlyPassing bool
// DNSRecursorStrategy controls the order in which DNS recursors are queried.
// 'sequential' queries recursors in the order they are listed under `recursors`.
// 'random' causes random selection of recursors which has the effect of
// spreading the query load among all listed servers, rather than having
// client agents try the first server in the list every time.
//
// hcl: dns_config { recursor_strategy = "(random|sequential)" }
DNSRecursorStrategy dns.RecursorStrategy
// DNSRecursorTimeout specifies the timeout in seconds
// for Consul's internal dns client used for recursion.
// This value is used for the connection, read and write timeout.

View File

@ -5425,6 +5425,7 @@ func TestLoad_FullConfig(t *testing.T) {
DNSNodeTTL: 7084 * time.Second,
DNSOnlyPassing: true,
DNSPort: 7001,
DNSRecursorStrategy: "sequential",
DNSRecursorTimeout: 4427 * time.Second,
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},

View File

@ -147,6 +147,7 @@
"DNSNodeTTL": "0s",
"DNSOnlyPassing": false,
"DNSPort": 0,
"DNSRecursorStrategy": "",
"DNSRecursorTimeout": "0s",
"DNSRecursors": [],
"DNSSOA": {

View File

@ -79,6 +79,7 @@ type dnsConfig struct {
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorStrategy agentdns.RecursorStrategy
RecursorTimeout time.Duration
Recursors []string
SegmentName string
@ -154,6 +155,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
NodeName: conf.NodeName,
NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing,
RecursorStrategy: conf.DNSRecursorStrategy,
RecursorTimeout: conf.DNSRecursorTimeout,
SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
@ -1851,7 +1853,8 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
@ -1936,7 +1939,8 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(m, recursor)
if err == nil {
d.logger.Debug("cname recurse RTT for name",

View File

@ -1,6 +1,9 @@
package dns
import "regexp"
import (
"math/rand"
"regexp"
)
// MaxLabelLength is the maximum length for a name that can be used in DNS.
const MaxLabelLength = 63
@ -8,3 +11,24 @@ const MaxLabelLength = 63
// InvalidNameRe is a regex that matches characters which can not be included in
// a DNS name.
var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`)
type RecursorStrategy string
const (
RecursorStrategySequential RecursorStrategy = "sequential"
RecursorStrategyRandom RecursorStrategy = "random"
)
func (s RecursorStrategy) Indexes(max int) []int {
switch s {
case RecursorStrategyRandom:
return rand.Perm(max)
default:
idxs := make([]int, max)
for i := range idxs {
idxs[i] = i
}
return idxs
}
}

40
agent/dns/dns_test.go Normal file
View File

@ -0,0 +1,40 @@
package dns
import (
"testing"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
)
func TestDNS_Recursor_StrategyRandom(t *testing.T) {
configuredRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("random")
retry.RunWith(&retry.Counter{Count: 5}, t, func(r *retry.R) {
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(configuredRecursors)) {
recursorsToQuery = append(recursorsToQuery, configuredRecursors[idx])
}
// Ensure the slices contain the same elements
require.ElementsMatch(t, configuredRecursors, recursorsToQuery)
// Ensure the elements are not in the same order
require.NotEqual(r, configuredRecursors, recursorsToQuery)
})
}
func TestDNS_Recursor_StrategySequential(t *testing.T) {
expectedRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("sequential")
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(expectedRecursors)) {
recursorsToQuery = append(recursorsToQuery, expectedRecursors[idx])
}
// The list of recursors should match the order in which they were defined
// in the configuration
require.Equal(t, recursorsToQuery, expectedRecursors)
}

View File

@ -7610,6 +7610,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
enable_truncate = false
only_passing = false
recursor_strategy = "sequential"
recursor_timeout = "15s"
disable_compression = false
a_record_limit = 1
@ -7628,6 +7629,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("sequential"), cfg.RecursorStrategy)
require.False(t, cfg.AllowStale)
require.Equal(t, 20*time.Second, cfg.MaxStale)
require.Equal(t, 10*time.Second, cfg.NodeTTL)
@ -7658,6 +7660,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
newCfg.DNSEnableTruncate = true
newCfg.DNSOnlyPassing = true
newCfg.DNSRecursorStrategy = "random"
newCfg.DNSRecursorTimeout = 16 * time.Second
newCfg.DNSDisableCompression = true
newCfg.DNSARecordLimit = 2
@ -7673,6 +7676,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("random"), cfg.RecursorStrategy)
require.True(t, cfg.AllowStale)
require.Equal(t, 21*time.Second, cfg.MaxStale)
require.Equal(t, 11*time.Second, cfg.NodeTTL)

View File

@ -1357,6 +1357,11 @@ bind_addr = "{{ GetPrivateInterfaces | include \"network\" \"10.0.0.0/8\" | attr
then all services on that node will be excluded because they are also considered
critical.
- `recursor_strategy` - If set to `sequential`, Consul will query recursors in the
order listed in the [`recursors`](#recursors) option. If set to `random`,
Consul will query an upstream DNS resolvers in a random order. Defaults to
`sequential`.
- `recursor_timeout` - Timeout used by Consul when
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.