From eb4bc7911882dfaa5346db7e0e094ad5bd2f22c8 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 10 May 2018 10:16:38 -0500 Subject: [PATCH] Make raft snapshot commit threshold configurable --- agent/agent.go | 3 +++ agent/config/builder.go | 1 + agent/config/config.go | 2 ++ agent/config/runtime.go | 6 ++++++ agent/config/runtime_test.go | 4 ++++ agent/consul/config.go | 3 +++ website/source/docs/agent/options.html.md | 6 ++++++ 7 files changed, 25 insertions(+) diff --git a/agent/agent.go b/agent/agent.go index 5da86230f..3b77878b1 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -763,6 +763,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) { if a.config.RaftProtocol != 0 { base.RaftConfig.ProtocolVersion = raft.ProtocolVersion(a.config.RaftProtocol) } + if a.config.RaftSnapshotThreshold != 0 { + base.RaftConfig.SnapshotThreshold = uint64(a.config.RaftSnapshotThreshold) + } if a.config.ACLMasterToken != "" { base.ACLMasterToken = a.config.ACLMasterToken } diff --git a/agent/config/builder.go b/agent/config/builder.go index b3902fc20..9a2bdc1d4 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -673,6 +673,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { RPCProtocol: b.intVal(c.RPCProtocol), RPCRateLimit: rate.Limit(b.float64Val(c.Limits.RPCRate)), RaftProtocol: b.intVal(c.RaftProtocol), + RaftSnapshotThreshold: b.intVal(c.RaftSnapshotThreshold), ReconnectTimeoutLAN: b.durationVal("reconnect_timeout", c.ReconnectTimeoutLAN), ReconnectTimeoutWAN: b.durationVal("reconnect_timeout_wan", c.ReconnectTimeoutWAN), RejoinAfterLeave: b.boolVal(c.RejoinAfterLeave), diff --git a/agent/config/config.go b/agent/config/config.go index 3468eb575..58d585ace 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -194,6 +194,7 @@ type Config struct { Ports Ports `json:"ports,omitempty" hcl:"ports" mapstructure:"ports"` RPCProtocol *int `json:"protocol,omitempty" hcl:"protocol" mapstructure:"protocol"` RaftProtocol *int `json:"raft_protocol,omitempty" hcl:"raft_protocol" mapstructure:"raft_protocol"` + RaftSnapshotThreshold *int `json:"raft_snapshot_threshold,omitempty" hcl:"raft_snapshot_threshold" mapstructure:"raft_snapshot_threshold"` ReconnectTimeoutLAN *string `json:"reconnect_timeout,omitempty" hcl:"reconnect_timeout" mapstructure:"reconnect_timeout"` ReconnectTimeoutWAN *string `json:"reconnect_timeout_wan,omitempty" hcl:"reconnect_timeout_wan" mapstructure:"reconnect_timeout_wan"` RejoinAfterLeave *bool `json:"rejoin_after_leave,omitempty" hcl:"rejoin_after_leave" mapstructure:"rejoin_after_leave"` @@ -264,6 +265,7 @@ type Consul struct { ElectionTimeout *string `json:"election_timeout,omitempty" hcl:"election_timeout" mapstructure:"election_timeout"` HeartbeatTimeout *string `json:"heartbeat_timeout,omitempty" hcl:"heartbeat_timeout" mapstructure:"heartbeat_timeout"` LeaderLeaseTimeout *string `json:"leader_lease_timeout,omitempty" hcl:"leader_lease_timeout" mapstructure:"leader_lease_timeout"` + SnapshotThreshold *int `json:"snapshot_threshold,omitempty" hcl:"snapshot_threshold" mapstructure:"snapshot_threshold"` } `json:"raft,omitempty" hcl:"raft" mapstructure:"raft"` SerfLAN struct { diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 4481a667a..fe1e315f2 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -899,6 +899,12 @@ type RuntimeConfig struct { // hcl: raft_protocol = int RaftProtocol int + // RaftSnapshotThreshold sets the minimum threshold of raft commits after which + // a snapshot is created. Defaults to 8192 + // + // hcl: raft_snapshot_threshold = int + RaftSnapshotThreshold int + // ReconnectTimeoutLAN specifies the amount of time to wait to reconnect with // another agent before deciding it's permanently gone. This can be used to // control the time it takes to reap failed nodes from the cluster. diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index f1376e6d4..d146fd52c 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -2421,6 +2421,7 @@ func TestFullConfig(t *testing.T) { }, "protocol": 30793, "raft_protocol": 19016, + "raft_snapshot_threshold": 16384, "reconnect_timeout": "23739s", "reconnect_timeout_wan": "26694s", "recursors": [ "63.38.39.58", "92.49.18.18" ], @@ -2852,6 +2853,7 @@ func TestFullConfig(t *testing.T) { } protocol = 30793 raft_protocol = 19016 + raft_snapshot_threshold = 16384 reconnect_timeout = "23739s" reconnect_timeout_wan = "26694s" recursors = [ "63.38.39.58", "92.49.18.18" ] @@ -3409,6 +3411,7 @@ func TestFullConfig(t *testing.T) { RPCRateLimit: 12029.43, RPCMaxBurst: 44848, RaftProtocol: 19016, + RaftSnapshotThreshold: 16384, ReconnectTimeoutLAN: 23739 * time.Second, ReconnectTimeoutWAN: 26694 * time.Second, RejoinAfterLeave: true, @@ -4089,6 +4092,7 @@ func TestSanitize(t *testing.T) { "RPCProtocol": 0, "RPCRateLimit": 0, "RaftProtocol": 0, + "RaftSnapshotThreshold": 0, "ReconnectTimeoutLAN": "0s", "ReconnectTimeoutWAN": "0s", "RejoinAfterLeave": false, diff --git a/agent/consul/config.go b/agent/consul/config.go index a8a7f249b..ef7b15343 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -451,6 +451,9 @@ func DefaultConfig() *Config { // Check every 5 seconds to see if there are enough new entries for a snapshot conf.RaftConfig.SnapshotInterval = 5 * time.Second + // Snapshots are created every 8192 entries by default, can be overridden + conf.RaftConfig.SnapshotThreshold = 8192 + return conf } diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index e5f8e464f..1d9989238 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -359,6 +359,9 @@ will exit with an error at startup. [Raft Protocol Version Compatibility](/docs/upgrade-specific.html#raft-protocol-version-compatibility) for more details. +* `-raft-snapshot-threshold` - This + control the minimum number of raft commit entries between snapshots that are saved to disk. + * `-recursor` - Specifies the address of an upstream DNS server. This option may be provided multiple times, and is functionally equivalent to the [`recursors` configuration option](#recursors). @@ -935,6 +938,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass * `raft_protocol` Equivalent to the [`-raft-protocol` command-line flag](#_raft_protocol). +* `raft_snapshot_threshold` Equivalent to the + [`-raft-snapshot-threshold` command-line flag](#_raft_snapshot_threshold). + * `reap` This controls Consul's automatic reaping of child processes, which is useful if Consul is running as PID 1 in a Docker container. If this isn't specified, then Consul will automatically reap child processes if it detects it is running as PID 1. If this is set to true or false, then