Add DNS recursor strategy option (#10611)

This change adds a new `dns_config.recursor_strategy` option which
controls how Consul queries DNS resolvers listed in the `recursors`
config option. The supported options are `sequential` (default), and
`random`.

Closes #8807

Co-authored-by: Blake Covarrubias <blake@covarrubi.as>
Co-authored-by: Priyanka Sengupta <psengupta@flatiron.com>
This commit is contained in:
Blake Covarrubias 2021-07-19 15:22:51 -07:00 committed by GitHub
parent 4d2bc76d62
commit 441a6c9969
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 127 additions and 19 deletions

3
.changelog/10611.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
config: add `dns_config.recursor_strategy` flag to control the order which DNS recursors are queried
```

View File

@ -908,6 +908,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL), DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL),
DNSOnlyPassing: boolVal(c.DNS.OnlyPassing), DNSOnlyPassing: boolVal(c.DNS.OnlyPassing),
DNSPort: dnsPort, DNSPort: dnsPort,
DNSRecursorStrategy: b.dnsRecursorStrategyVal(stringVal(c.DNS.RecursorStrategy)),
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout), DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
DNSRecursors: dnsRecursors, DNSRecursors: dnsRecursors,
DNSServiceTTL: dnsServiceTTL, DNSServiceTTL: dnsServiceTTL,
@ -1745,6 +1746,20 @@ func (b *builder) meshGatewayConfVal(mgConf *MeshGatewayConfig) structs.MeshGate
return cfg return cfg
} }
func (b *builder) dnsRecursorStrategyVal(v string) dns.RecursorStrategy {
var out dns.RecursorStrategy
switch dns.RecursorStrategy(v) {
case dns.RecursorStrategyRandom:
out = dns.RecursorStrategyRandom
case dns.RecursorStrategySequential, "":
out = dns.RecursorStrategySequential
default:
b.err = multierror.Append(b.err, fmt.Errorf("dns_config.recursor_strategy: invalid strategy: %q", v))
}
return out
}
func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig { func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig {
var out structs.ExposeConfig var out structs.ExposeConfig
if v == nil { if v == nil {

View File

@ -634,6 +634,7 @@ type DNS struct {
MaxStale *string `mapstructure:"max_stale"` MaxStale *string `mapstructure:"max_stale"`
NodeTTL *string `mapstructure:"node_ttl"` NodeTTL *string `mapstructure:"node_ttl"`
OnlyPassing *bool `mapstructure:"only_passing"` OnlyPassing *bool `mapstructure:"only_passing"`
RecursorStrategy *string `mapstructure:"recursor_strategy"`
RecursorTimeout *string `mapstructure:"recursor_timeout"` RecursorTimeout *string `mapstructure:"recursor_timeout"`
ServiceTTL map[string]string `mapstructure:"service_ttl"` ServiceTTL map[string]string `mapstructure:"service_ttl"`
UDPAnswerLimit *int `mapstructure:"udp_answer_limit"` UDPAnswerLimit *int `mapstructure:"udp_answer_limit"`

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api" "github.com/hashicorp/consul/api"
@ -270,6 +271,15 @@ type RuntimeConfig struct {
// hcl: dns_config { only_passing = (true|false) } // hcl: dns_config { only_passing = (true|false) }
DNSOnlyPassing bool DNSOnlyPassing bool
// DNSRecursorStrategy controls the order in which DNS recursors are queried.
// 'sequential' queries recursors in the order they are listed under `recursors`.
// 'random' causes random selection of recursors which has the effect of
// spreading the query load among all listed servers, rather than having
// client agents try the first server in the list every time.
//
// hcl: dns_config { recursor_strategy = "(random|sequential)" }
DNSRecursorStrategy dns.RecursorStrategy
// DNSRecursorTimeout specifies the timeout in seconds // DNSRecursorTimeout specifies the timeout in seconds
// for Consul's internal dns client used for recursion. // for Consul's internal dns client used for recursion.
// This value is used for the connection, read and write timeout. // This value is used for the connection, read and write timeout.

View File

@ -5425,6 +5425,7 @@ func TestLoad_FullConfig(t *testing.T) {
DNSNodeTTL: 7084 * time.Second, DNSNodeTTL: 7084 * time.Second,
DNSOnlyPassing: true, DNSOnlyPassing: true,
DNSPort: 7001, DNSPort: 7001,
DNSRecursorStrategy: "sequential",
DNSRecursorTimeout: 4427 * time.Second, DNSRecursorTimeout: 4427 * time.Second,
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"}, DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0}, DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},

View File

@ -147,6 +147,7 @@
"DNSNodeTTL": "0s", "DNSNodeTTL": "0s",
"DNSOnlyPassing": false, "DNSOnlyPassing": false,
"DNSPort": 0, "DNSPort": 0,
"DNSRecursorStrategy": "",
"DNSRecursorTimeout": "0s", "DNSRecursorTimeout": "0s",
"DNSRecursors": [], "DNSRecursors": [],
"DNSSOA": { "DNSSOA": {

View File

@ -70,22 +70,23 @@ type dnsSOAConfig struct {
} }
type dnsConfig struct { type dnsConfig struct {
AllowStale bool AllowStale bool
Datacenter string Datacenter string
EnableTruncate bool EnableTruncate bool
MaxStale time.Duration MaxStale time.Duration
UseCache bool UseCache bool
CacheMaxAge time.Duration CacheMaxAge time.Duration
NodeName string NodeName string
NodeTTL time.Duration NodeTTL time.Duration
OnlyPassing bool OnlyPassing bool
RecursorTimeout time.Duration RecursorStrategy agentdns.RecursorStrategy
Recursors []string RecursorTimeout time.Duration
SegmentName string Recursors []string
UDPAnswerLimit int SegmentName string
ARecordLimit int UDPAnswerLimit int
NodeMetaTXT bool ARecordLimit int
SOAConfig dnsSOAConfig NodeMetaTXT bool
SOAConfig dnsSOAConfig
// TTLRadix sets service TTLs by prefix, eg: "database-*" // TTLRadix sets service TTLs by prefix, eg: "database-*"
TTLRadix *radix.Tree TTLRadix *radix.Tree
// TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix // TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix
@ -154,6 +155,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
NodeName: conf.NodeName, NodeName: conf.NodeName,
NodeTTL: conf.DNSNodeTTL, NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing, OnlyPassing: conf.DNSOnlyPassing,
RecursorStrategy: conf.DNSRecursorStrategy,
RecursorTimeout: conf.DNSRecursorTimeout, RecursorTimeout: conf.DNSRecursorTimeout,
SegmentName: conf.SegmentName, SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit, UDPAnswerLimit: conf.DNSUDPAnswerLimit,
@ -1851,7 +1853,8 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
var r *dns.Msg var r *dns.Msg
var rtt time.Duration var rtt time.Duration
var err error var err error
for _, recursor := range cfg.Recursors { for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(req, recursor) r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code // Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) { if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
@ -1936,7 +1939,8 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
var r *dns.Msg var r *dns.Msg
var rtt time.Duration var rtt time.Duration
var err error var err error
for _, recursor := range cfg.Recursors { for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(m, recursor) r, rtt, err = c.Exchange(m, recursor)
if err == nil { if err == nil {
d.logger.Debug("cname recurse RTT for name", d.logger.Debug("cname recurse RTT for name",

View File

@ -1,6 +1,9 @@
package dns package dns
import "regexp" import (
"math/rand"
"regexp"
)
// MaxLabelLength is the maximum length for a name that can be used in DNS. // MaxLabelLength is the maximum length for a name that can be used in DNS.
const MaxLabelLength = 63 const MaxLabelLength = 63
@ -8,3 +11,24 @@ const MaxLabelLength = 63
// InvalidNameRe is a regex that matches characters which can not be included in // InvalidNameRe is a regex that matches characters which can not be included in
// a DNS name. // a DNS name.
var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`) var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`)
type RecursorStrategy string
const (
RecursorStrategySequential RecursorStrategy = "sequential"
RecursorStrategyRandom RecursorStrategy = "random"
)
func (s RecursorStrategy) Indexes(max int) []int {
switch s {
case RecursorStrategyRandom:
return rand.Perm(max)
default:
idxs := make([]int, max)
for i := range idxs {
idxs[i] = i
}
return idxs
}
}

40
agent/dns/dns_test.go Normal file
View File

@ -0,0 +1,40 @@
package dns
import (
"testing"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
)
func TestDNS_Recursor_StrategyRandom(t *testing.T) {
configuredRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("random")
retry.RunWith(&retry.Counter{Count: 5}, t, func(r *retry.R) {
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(configuredRecursors)) {
recursorsToQuery = append(recursorsToQuery, configuredRecursors[idx])
}
// Ensure the slices contain the same elements
require.ElementsMatch(t, configuredRecursors, recursorsToQuery)
// Ensure the elements are not in the same order
require.NotEqual(r, configuredRecursors, recursorsToQuery)
})
}
func TestDNS_Recursor_StrategySequential(t *testing.T) {
expectedRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("sequential")
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(expectedRecursors)) {
recursorsToQuery = append(recursorsToQuery, expectedRecursors[idx])
}
// The list of recursors should match the order in which they were defined
// in the configuration
require.Equal(t, recursorsToQuery, expectedRecursors)
}

View File

@ -7610,6 +7610,7 @@ func TestDNS_ConfigReload(t *testing.T) {
} }
enable_truncate = false enable_truncate = false
only_passing = false only_passing = false
recursor_strategy = "sequential"
recursor_timeout = "15s" recursor_timeout = "15s"
disable_compression = false disable_compression = false
a_record_limit = 1 a_record_limit = 1
@ -7628,6 +7629,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers { for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig) cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors) require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("sequential"), cfg.RecursorStrategy)
require.False(t, cfg.AllowStale) require.False(t, cfg.AllowStale)
require.Equal(t, 20*time.Second, cfg.MaxStale) require.Equal(t, 20*time.Second, cfg.MaxStale)
require.Equal(t, 10*time.Second, cfg.NodeTTL) require.Equal(t, 10*time.Second, cfg.NodeTTL)
@ -7658,6 +7660,7 @@ func TestDNS_ConfigReload(t *testing.T) {
} }
newCfg.DNSEnableTruncate = true newCfg.DNSEnableTruncate = true
newCfg.DNSOnlyPassing = true newCfg.DNSOnlyPassing = true
newCfg.DNSRecursorStrategy = "random"
newCfg.DNSRecursorTimeout = 16 * time.Second newCfg.DNSRecursorTimeout = 16 * time.Second
newCfg.DNSDisableCompression = true newCfg.DNSDisableCompression = true
newCfg.DNSARecordLimit = 2 newCfg.DNSARecordLimit = 2
@ -7673,6 +7676,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers { for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig) cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors) require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("random"), cfg.RecursorStrategy)
require.True(t, cfg.AllowStale) require.True(t, cfg.AllowStale)
require.Equal(t, 21*time.Second, cfg.MaxStale) require.Equal(t, 21*time.Second, cfg.MaxStale)
require.Equal(t, 11*time.Second, cfg.NodeTTL) require.Equal(t, 11*time.Second, cfg.NodeTTL)

View File

@ -1357,6 +1357,11 @@ bind_addr = "{{ GetPrivateInterfaces | include \"network\" \"10.0.0.0/8\" | attr
then all services on that node will be excluded because they are also considered then all services on that node will be excluded because they are also considered
critical. critical.
- `recursor_strategy` - If set to `sequential`, Consul will query recursors in the
order listed in the [`recursors`](#recursors) option. If set to `random`,
Consul will query an upstream DNS resolvers in a random order. Defaults to
`sequential`.
- `recursor_timeout` - Timeout used by Consul when - `recursor_timeout` - Timeout used by Consul when
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later. recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.