Merge pull request #4105 from hashicorp/f-raft-threshold-config

Make raft snapshot commit threshold configurable
2018-05-11 10:47:03 -05:00 · 2018-05-11 10:47:03 -05:00 · 78d54a4be9
parent 0366d5be37 7400a78f8a
commit 78d54a4be9
8 changed files with 65 additions and 2 deletions
--- a/agent/agent.go
+++ b/agent/agent.go
@ -763,6 +763,12 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
 	if a.config.RaftProtocol != 0 {
 		base.RaftConfig.ProtocolVersion = raft.ProtocolVersion(a.config.RaftProtocol)
 	}
+	if a.config.RaftSnapshotThreshold != 0 {
+		base.RaftConfig.SnapshotThreshold = uint64(a.config.RaftSnapshotThreshold)
+	}
+	if a.config.RaftSnapshotInterval != 0 {
+		base.RaftConfig.SnapshotInterval = a.config.RaftSnapshotInterval
+	}
 	if a.config.ACLMasterToken != "" {
 		base.ACLMasterToken = a.config.ACLMasterToken
 	}
--- a/agent/config/builder.go
+++ b/agent/config/builder.go
@ -673,6 +673,8 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
 		RPCProtocol:                 b.intVal(c.RPCProtocol),
 		RPCRateLimit:                rate.Limit(b.float64Val(c.Limits.RPCRate)),
 		RaftProtocol:                b.intVal(c.RaftProtocol),
+		RaftSnapshotThreshold:       b.intVal(c.RaftSnapshotThreshold),
+		RaftSnapshotInterval:        b.durationVal("raft_snapshot_interval", c.RaftSnapshotInterval),
 		ReconnectTimeoutLAN:         b.durationVal("reconnect_timeout", c.ReconnectTimeoutLAN),
 		ReconnectTimeoutWAN:         b.durationVal("reconnect_timeout_wan", c.ReconnectTimeoutWAN),
 		RejoinAfterLeave:            b.boolVal(c.RejoinAfterLeave),
--- a/agent/config/config.go
+++ b/agent/config/config.go
@ -194,6 +194,8 @@ type Config struct {
 	Ports                       Ports                    `json:"ports,omitempty" hcl:"ports" mapstructure:"ports"`
 	RPCProtocol                 *int                     `json:"protocol,omitempty" hcl:"protocol" mapstructure:"protocol"`
 	RaftProtocol                *int                     `json:"raft_protocol,omitempty" hcl:"raft_protocol" mapstructure:"raft_protocol"`
+	RaftSnapshotThreshold       *int                     `json:"raft_snapshot_threshold,omitempty" hcl:"raft_snapshot_threshold" mapstructure:"raft_snapshot_threshold"`
+	RaftSnapshotInterval        *string                  `json:"raft_snapshot_interval,omitempty" hcl:"raft_snapshot_interval" mapstructure:"raft_snapshot_interval"`
 	ReconnectTimeoutLAN         *string                  `json:"reconnect_timeout,omitempty" hcl:"reconnect_timeout" mapstructure:"reconnect_timeout"`
 	ReconnectTimeoutWAN         *string                  `json:"reconnect_timeout_wan,omitempty" hcl:"reconnect_timeout_wan" mapstructure:"reconnect_timeout_wan"`
 	RejoinAfterLeave            *bool                    `json:"rejoin_after_leave,omitempty" hcl:"rejoin_after_leave" mapstructure:"rejoin_after_leave"`
--- a/agent/config/runtime.go
+++ b/agent/config/runtime.go
@ -899,6 +899,17 @@ type RuntimeConfig struct {
 	// hcl: raft_protocol = int
 	RaftProtocol int

+	// RaftSnapshotThreshold sets the minimum threshold of raft commits after which
+	// a snapshot is created. Defaults to 8192
+	//
+	// hcl: raft_snapshot_threshold = int
+	RaftSnapshotThreshold int
+
+	// RaftSnapshotInterval sets the interval to use when checking whether to create
+	// a new snapshot. Defaults to 5 seconds.
+	// hcl: raft_snapshot_threshold = int
+	RaftSnapshotInterval time.Duration
+
 	// ReconnectTimeoutLAN specifies the amount of time to wait to reconnect with
 	// another agent before deciding it's permanently gone. This can be used to
 	// control the time it takes to reap failed nodes from the cluster.
--- a/agent/config/runtime_test.go
+++ b/agent/config/runtime_test.go
@ -2421,6 +2421,8 @@ func TestFullConfig(t *testing.T) {
 			},
 			"protocol": 30793,
 			"raft_protocol": 19016,
+			"raft_snapshot_threshold": 16384,
+			"raft_snapshot_interval": "30s",
 			"reconnect_timeout": "23739s",
 			"reconnect_timeout_wan": "26694s",
 			"recursors": [ "63.38.39.58", "92.49.18.18" ],
@ -2852,6 +2854,8 @@ func TestFullConfig(t *testing.T) {
 			}
 			protocol = 30793
 			raft_protocol = 19016
+			raft_snapshot_threshold = 16384
+			raft_snapshot_interval = "30s"
 			reconnect_timeout = "23739s"
 			reconnect_timeout_wan = "26694s"
 			recursors = [ "63.38.39.58", "92.49.18.18" ]
@ -3409,6 +3413,8 @@ func TestFullConfig(t *testing.T) {
 		RPCRateLimit:              12029.43,
 		RPCMaxBurst:               44848,
 		RaftProtocol:              19016,
+		RaftSnapshotThreshold:     16384,
+		RaftSnapshotInterval:      30 * time.Second,
 		ReconnectTimeoutLAN:       23739 * time.Second,
 		ReconnectTimeoutWAN:       26694 * time.Second,
 		RejoinAfterLeave:          true,
@ -4089,6 +4095,8 @@ func TestSanitize(t *testing.T) {
    "RPCProtocol": 0,
    "RPCRateLimit": 0,
    "RaftProtocol": 0,
+    "RaftSnapshotInterval": "0s",
+    "RaftSnapshotThreshold": 0,
    "ReconnectTimeoutLAN": "0s",
    "ReconnectTimeoutWAN": "0s",
    "RejoinAfterLeave": false,
--- a/agent/consul/config.go
+++ b/agent/consul/config.go
@ -448,8 +448,11 @@ func DefaultConfig() *Config {
 	// Disable shutdown on removal
 	conf.RaftConfig.ShutdownOnRemove = false

-	// Check every 5 seconds to see if there are enough new entries for a snapshot
-	conf.RaftConfig.SnapshotInterval = 5 * time.Second
+	// Check every 5 seconds to see if there are enough new entries for a snapshot, can be overridden
+	conf.RaftConfig.SnapshotInterval = 30 * time.Second
+
+	// Snapshots are created every 16384 entries by default, can be overridden
+	conf.RaftConfig.SnapshotThreshold = 16384

 	return conf
 }
--- a/website/source/docs/agent/options.html.md
+++ b/website/source/docs/agent/options.html.md
@ -359,6 +359,20 @@ will exit with an error at startup.
  [Raft Protocol Version Compatibility](/docs/upgrade-specific.html#raft-protocol-version-compatibility)
  for more details.

+* <a name="_raft_snapshot_threshold"></a><a href="#_raft_snapshot_threshold">`-raft-snapshot-threshold`</a> - This controls the
+  minimum number of raft commit entries between snapshots that are saved to disk. This is a low-level parameter that should
+  rarely need to be changed. Very busy clusters experiencing excessive disk IO may increase this value to reduce disk IO, and minimize
+  the chances of all servers taking snapshots at the same time. Increasing this trades off disk IO for disk space since the log will
+  grow much larger and the space in the raft.db file can't be reclaimed till the next snapshot. Servers may take longer to recover from
+  crashes or failover if this is increased significantly as more logs will need to be replayed.
+
+* <a name="_raft_snapshot_interval"></a><a href="#_raft_snapshot_interval">`-raft-snapshot-interval`</a> - This controls how often servers
+  check if they need to save a snapshot to disk. his is a low-level parameter that should rarely need to be changed. Very busy clusters
+  experiencing excessive disk IO may increase this value to reduce disk IO, and minimize the chances of all servers taking snapshots at the same time.
+  Increasing this trades off disk IO for disk space since the log will grow much larger and the space in the raft.db file can't be reclaimed
+  till the next snapshot. Servers may take longer to recover from crashes or failover if this is increased significantly as more logs
+  will need to be replayed.
+
 * <a name="_recursor"></a><a href="#_recursor">`-recursor`</a> - Specifies the address of an upstream DNS
  server. This option may be provided multiple times, and is functionally
  equivalent to the [`recursors` configuration option](#recursors).
@ -935,6 +949,12 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
 * <a name="raft_protocol"></a><a href="#raft_protocol">`raft_protocol`</a> Equivalent to the
  [`-raft-protocol` command-line flag](#_raft_protocol).

+* <a name="raft_snapshot_threshold"></a><a href="#raft_snapshot_threshold">`raft_snapshot_threshold`</a> Equivalent to the
+  [`-raft-snapshot-threshold` command-line flag](#_raft_snapshot_threshold).
+
+* <a name="raft_snapshot_interval"></a><a href="#raft_snapshot_interval">`raft_snapshot_interval`</a> Equivalent to the
+  [`-raft-snapshot-interval` command-line flag](#_raft_snapshot_interval).
+
 * <a name="reap"></a><a href="#reap">`reap`</a> This controls Consul's automatic reaping of child processes,
  which is useful if Consul is running as PID 1 in a Docker container. If this isn't specified, then Consul will
  automatically reap child processes if it detects it is running as PID 1. If this is set to true or false, then
--- a/website/source/docs/upgrade-specific.html.md
+++ b/website/source/docs/upgrade-specific.html.md
@ -26,6 +26,17 @@ The following previously deprecated fields and config options have been removed:
 - The [deprecated set of metric names](/docs/upgrade-specific.html#metric-names-updated) (beginning with `consul.consul.`) has been removed
 along with the `enable_deprecated_names` option from the metrics configuration.

+#### New defaults for Raft Snapshot Creation
+Consul 1.0.1 (and earlier versions of Consul) checked for raft snapshots every
+5 seconds, and created new snapshots for every 8192 writes. These defaults cause
+constant disk IO in large busy clusters. Consul 1.1.0 increases these to larger values,
+and makes them tunable via the [raft_snapshot_interval](/docs/agent/options.html#_raft_snapshot_interval) and
+[raft_snapshot_threshold](/docs/agent/options.html#_raft_snapshot_threshold) parameters. We recommend
+keeping the new defaults. However, operators can go back to the old defaults by changing their
+config if they prefer more frequent snapshots. See the documentation for [raft_snapshot_interval](/docs/agent/options.html#_raft_snapshot_interval)
+and [raft_snapshot_threshold](/docs/agent/options.html#_raft_snapshot_threshold) to understand the trade-offs
+when tuning these.
+
 ## Consul 1.0.1

 #### Carefully Check and Remove Stale Servers During Rolling Upgrades