Make Raft trailing logs and snapshot timing reloadable (#10129)

* WIP reloadable raft config

* Pre-define new raft gauges

* Update go-metrics to change gauge reset behaviour

* Update raft to pull in new metric and reloadable config

* Add snapshot persistance timing and installSnapshot to our 'protected' list as they can be infrequent but are important

* Update telemetry docs

* Update config and telemetry docs

* Add note to oldestLogAge on when it is visible

* Add changelog entry

* Update website/content/docs/agent/options.mdx

Co-authored-by: Matt Keeler <mkeeler@users.noreply.github.com>

Co-authored-by: Matt Keeler <mkeeler@users.noreply.github.com>
This commit is contained in:
Paul Banks 2021-05-04 15:36:53 +01:00 committed by GitHub
parent eb84a856c4
commit d47eea3a3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 805 additions and 258 deletions

4
.changelog/10129.txt Normal file
View File

@ -0,0 +1,4 @@
```release-note:improvement
raft: allow reloading of raft trailing logs and snapshot timing to allow recovery from some [replication failure modes](https://github.com/hashicorp/consul/issues/9609).
telemetry: add metrics and documentation for [monitoring for replication issues](https://consul.io/docs/agent/telemetry#raft-replication-capacity-issues).
```

View File

@ -3639,10 +3639,13 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error {
}
cc := consul.ReloadableConfig{
RPCRateLimit: newCfg.RPCRateLimit,
RPCMaxBurst: newCfg.RPCMaxBurst,
RPCMaxConnsPerClient: newCfg.RPCMaxConnsPerClient,
ConfigEntryBootstrap: newCfg.ConfigEntryBootstrap,
RPCRateLimit: newCfg.RPCRateLimit,
RPCMaxBurst: newCfg.RPCMaxBurst,
RPCMaxConnsPerClient: newCfg.RPCMaxConnsPerClient,
ConfigEntryBootstrap: newCfg.ConfigEntryBootstrap,
RaftSnapshotThreshold: newCfg.RaftSnapshotThreshold,
RaftSnapshotInterval: newCfg.RaftSnapshotInterval,
RaftTrailingLogs: newCfg.RaftTrailingLogs,
}
if err := a.delegate.ReloadConfig(cc); err != nil {
return err

View File

@ -659,8 +659,11 @@ type RPCConfig struct {
// ReloadableConfig is the configuration that is passed to ReloadConfig when
// application config is reloaded.
type ReloadableConfig struct {
RPCRateLimit rate.Limit
RPCMaxBurst int
RPCMaxConnsPerClient int
ConfigEntryBootstrap []structs.ConfigEntry
RPCRateLimit rate.Limit
RPCMaxBurst int
RPCMaxConnsPerClient int
ConfigEntryBootstrap []structs.ConfigEntry
RaftSnapshotThreshold int
RaftSnapshotInterval time.Duration
RaftTrailingLogs int
}

View File

@ -1387,6 +1387,13 @@ func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) {
// ReloadConfig is used to have the Server do an online reload of
// relevant configuration information
func (s *Server) ReloadConfig(config ReloadableConfig) error {
// Reload raft config first before updating any other state since it could
// error if the new config is invalid.
raftCfg := computeRaftReloadableConfig(config)
if err := s.raft.ReloadConfig(raftCfg); err != nil {
return err
}
s.rpcLimiter.Store(rate.NewLimiter(config.RPCRateLimit, config.RPCMaxBurst))
s.rpcConnLimiter.SetConfig(connlimit.Config{
MaxConnsPerClientIP: config.RPCMaxConnsPerClient,
@ -1401,6 +1408,33 @@ func (s *Server) ReloadConfig(config ReloadableConfig) error {
return nil
}
// computeRaftReloadableConfig works out the correct reloadable config for raft.
// We reload raft even if nothing has changed since it's cheap and simpler than
// trying to work out if it's different from the current raft config. This
// function is separate to make it cheap to table test thoroughly without a full
// raft instance.
func computeRaftReloadableConfig(config ReloadableConfig) raft.ReloadableConfig {
// We use the raw defaults _not_ the current values so that you can reload
// back to a zero value having previously started Consul with a custom value
// for one of these fields.
defaultConf := DefaultConfig()
raftCfg := raft.ReloadableConfig{
TrailingLogs: defaultConf.RaftConfig.TrailingLogs,
SnapshotInterval: defaultConf.RaftConfig.SnapshotInterval,
SnapshotThreshold: defaultConf.RaftConfig.SnapshotThreshold,
}
if config.RaftSnapshotThreshold != 0 {
raftCfg.SnapshotThreshold = uint64(config.RaftSnapshotThreshold)
}
if config.RaftSnapshotInterval != 0 {
raftCfg.SnapshotInterval = config.RaftSnapshotInterval
}
if config.RaftTrailingLogs != 0 {
raftCfg.TrailingLogs = uint64(config.RaftTrailingLogs)
}
return raftCfg
}
// Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
func (s *Server) setConsistentReadReady() {
atomic.StoreInt32(&s.readyForConsistentReads, 1)

View File

@ -14,6 +14,7 @@ import (
"github.com/google/tcpproxy"
"github.com/hashicorp/memberlist"
"github.com/hashicorp/raft"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/ipaddr"
@ -1466,6 +1467,9 @@ func TestServer_ReloadConfig(t *testing.T) {
c.Build = "1.5.0"
c.RPCRateLimit = 500
c.RPCMaxBurst = 5000
// Set one raft param to be non-default in the initial config, others are
// default.
c.RaftConfig.TrailingLogs = 1234
})
defer os.RemoveAll(dir1)
defer s.Shutdown()
@ -1480,6 +1484,14 @@ func TestServer_ReloadConfig(t *testing.T) {
RPCRateLimit: 1000,
RPCMaxBurst: 10000,
ConfigEntryBootstrap: []structs.ConfigEntry{entryInit},
// Reset the custom one to default be removing it from config file (it will
// be a zero value here).
RaftTrailingLogs: 0,
// Set a different Raft param to something custom now
RaftSnapshotThreshold: 4321,
// Leave other raft fields default
}
require.NoError(t, s.ReloadConfig(rc))
@ -1496,6 +1508,98 @@ func TestServer_ReloadConfig(t *testing.T) {
limiter = s.rpcLimiter.Load().(*rate.Limiter)
require.Equal(t, rate.Limit(1000), limiter.Limit())
require.Equal(t, 10000, limiter.Burst())
// Check raft config
defaults := DefaultConfig()
got := s.raft.ReloadableConfig()
require.Equal(t, uint64(4321), got.SnapshotThreshold,
"should have be reloaded to new value")
require.Equal(t, defaults.RaftConfig.SnapshotInterval, got.SnapshotInterval,
"should have remained the default interval")
require.Equal(t, defaults.RaftConfig.TrailingLogs, got.TrailingLogs,
"should have reloaded to default trailing_logs")
// Now check that update each of those raft fields separately works correctly
// too.
}
func TestServer_computeRaftReloadableConfig(t *testing.T) {
defaults := DefaultConfig().RaftConfig
cases := []struct {
name string
rc ReloadableConfig
want raft.ReloadableConfig
}{
{
// This case is the common path - reload is called with a ReloadableConfig
// populated from the RuntimeConfig which has zero values for the fields.
// On startup we selectively pick non-zero runtime config fields to
// override defaults so we need to do the same.
name: "Still defaults",
rc: ReloadableConfig{},
want: raft.ReloadableConfig{
SnapshotThreshold: defaults.SnapshotThreshold,
SnapshotInterval: defaults.SnapshotInterval,
TrailingLogs: defaults.TrailingLogs,
},
},
{
name: "Threshold set",
rc: ReloadableConfig{
RaftSnapshotThreshold: 123456,
},
want: raft.ReloadableConfig{
SnapshotThreshold: 123456,
SnapshotInterval: defaults.SnapshotInterval,
TrailingLogs: defaults.TrailingLogs,
},
},
{
name: "interval set",
rc: ReloadableConfig{
RaftSnapshotInterval: 13 * time.Minute,
},
want: raft.ReloadableConfig{
SnapshotThreshold: defaults.SnapshotThreshold,
SnapshotInterval: 13 * time.Minute,
TrailingLogs: defaults.TrailingLogs,
},
},
{
name: "trailing logs set",
rc: ReloadableConfig{
RaftTrailingLogs: 78910,
},
want: raft.ReloadableConfig{
SnapshotThreshold: defaults.SnapshotThreshold,
SnapshotInterval: defaults.SnapshotInterval,
TrailingLogs: 78910,
},
},
{
name: "all set",
rc: ReloadableConfig{
RaftSnapshotThreshold: 123456,
RaftSnapshotInterval: 13 * time.Minute,
RaftTrailingLogs: 78910,
},
want: raft.ReloadableConfig{
SnapshotThreshold: 123456,
SnapshotInterval: 13 * time.Minute,
TrailingLogs: 78910,
},
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
got := computeRaftReloadableConfig(tc.rc)
require.Equal(t, tc.want, got)
})
}
}
func TestServer_RPC_RateLimit(t *testing.T) {

View File

@ -175,6 +175,19 @@ func registerWithGRPC(b grpcresolver.Builder) {
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) {
// TODO: "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
raftGauges := []prometheus.GaugeDefinition{
{
Name: []string{"raft", "fsm", "lastRestoreDuration"},
Help: "This measures how long the last FSM restore (from disk or leader) took.",
},
{
Name: []string{"raft", "leader", "oldestLogAge"},
Help: "This measures how old the oldest log in the leader's log store is.",
},
}
// Build slice of slices for all gauge definitions
var gauges = [][]prometheus.GaugeDefinition{
cache.Gauges,
@ -185,7 +198,9 @@ func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, [
usagemetrics.Gauges,
consul.ReplicationGauges,
Gauges,
raftGauges,
}
// Flatten definitions
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
var gaugeDefs []prometheus.GaugeDefinition
@ -252,6 +267,14 @@ func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, [
Name: []string{"raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
{
Name: []string{"raft", "snapshot", "persist"},
Help: "Measures the time it takes raft to write a new snapshot to disk.",
},
{
Name: []string{"raft", "rpc", "installSnapshot"},
Help: "Measures the time it takes the raft leader to install a snapshot on a follower that is catching up after being down or has just joined the cluster.",
},
}
var summaries = [][]prometheus.SummaryDefinition{

4
go.mod
View File

@ -12,7 +12,7 @@ require (
github.com/Microsoft/go-winio v0.4.3 // indirect
github.com/NYTimes/gziphandler v1.0.1
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
github.com/armon/go-metrics v0.3.6
github.com/armon/go-metrics v0.3.7
github.com/armon/go-radix v1.0.0
github.com/aws/aws-sdk-go v1.25.41
github.com/coredns/coredns v1.1.2
@ -52,7 +52,7 @@ require (
github.com/hashicorp/mdns v1.0.4 // indirect
github.com/hashicorp/memberlist v0.2.3
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69
github.com/hashicorp/raft v1.2.0
github.com/hashicorp/raft v1.3.0
github.com/hashicorp/raft-autopilot v0.1.2
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
github.com/hashicorp/serf v0.9.5

7
go.sum
View File

@ -58,8 +58,8 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs=
github.com/armon/go-metrics v0.3.6 h1:x/tmtOF9cDBoXH7XoAGOz2qqm1DknFD1590XmD/DUJ8=
github.com/armon/go-metrics v0.3.6/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-metrics v0.3.7 h1:c/oCtWzYpboy6+6f6LjXRlyW7NwA2SWf+a9KMlHq/bM=
github.com/armon/go-metrics v0.3.7/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
@ -279,8 +279,9 @@ github.com/hashicorp/memberlist v0.2.3/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69 h1:lc3c72qGlIMDqQpQH82Y4vaglRMMFdJbziYWriR4UcE=
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69/go.mod h1:/z+jUGRBlwVpUZfjute9jWaF6/HuhjuFQuL1YXzVD1Q=
github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.2.0 h1:mHzHIrF0S91d3A7RPBvuqkgB4d/7oFJZyvf1Q4m7GA0=
github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.3.0 h1:Wox4J4R7J2FOJLtTa6hdk0VJfiNUSP32pYoYR738bkE=
github.com/hashicorp/raft v1.3.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM=
github.com/hashicorp/raft-autopilot v0.1.2 h1:yeqdUjWLjVJkBM+mcVxqwxi+w+aHsb9cEON2dz69OCs=
github.com/hashicorp/raft-autopilot v0.1.2/go.mod h1:Af4jZBwaNOI+tXfIqIdbcAnh/UyyqIMj/pOISIfhArw=
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBAS2J0VBzVa5e80b5ZtYuNQtgXjN40qBZlD4=

View File

@ -5,7 +5,6 @@ package prometheus
import (
"fmt"
"log"
"math"
"regexp"
"strings"
"sync"
@ -31,17 +30,16 @@ type PrometheusOpts struct {
Expiration time.Duration
Registerer prometheus.Registerer
// Gauges, Summaries, and Counters allow us to pre-declare metrics by giving their Name, Help, and ConstLabels to
// the PrometheusSink when it is created. Metrics declared in this way will be initialized at zero and will not be
// deleted when their expiry is reached.
// - Gauges and Summaries will be set to NaN when they expire.
// - Counters continue to Collect their last known value.
// Ex:
// PrometheusOpts{
// Gauges, Summaries, and Counters allow us to pre-declare metrics by giving
// their Name, Help, and ConstLabels to the PrometheusSink when it is created.
// Metrics declared in this way will be initialized at zero and will not be
// deleted or altered when their expiry is reached.
//
// Ex: PrometheusOpts{
// Expiration: 10 * time.Second,
// Gauges: []GaugeDefinition{
// {
// Name: []string{ "application", "component", "measurement"},
// Name: []string{ "application", "component", "measurement"},
// Help: "application_component_measurement provides an example of how to declare static metrics",
// ConstLabels: []metrics.Label{ { Name: "my_label", Value: "does_not_change" }, },
// },
@ -139,21 +137,24 @@ func (p *PrometheusSink) Describe(c chan<- *prometheus.Desc) {
// logic to clean up ephemeral metrics if their value haven't been set for a
// duration exceeding our allowed expiration time.
func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
p.collectAtTime(c, time.Now())
}
// collectAtTime allows internal testing of the expiry based logic here without
// mocking clocks or making tests timing sensitive.
func (p *PrometheusSink) collectAtTime(c chan<- prometheus.Metric, t time.Time) {
expire := p.expiration != 0
now := time.Now()
p.gauges.Range(func(k, v interface{}) bool {
if v == nil {
return true
}
g := v.(*gauge)
lastUpdate := g.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if expire && lastUpdate.Add(p.expiration).Before(t) {
if g.canDelete {
p.gauges.Delete(k)
return true
}
// We have not observed the gauge this interval so we don't know its value.
g.Set(math.NaN())
}
g.Collect(c)
return true
@ -164,13 +165,11 @@ func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
}
s := v.(*summary)
lastUpdate := s.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if expire && lastUpdate.Add(p.expiration).Before(t) {
if s.canDelete {
p.summaries.Delete(k)
return true
}
// We have observed nothing in this interval.
s.Observe(math.NaN())
}
s.Collect(c)
return true
@ -181,12 +180,11 @@ func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
}
count := v.(*counter)
lastUpdate := count.updatedAt
if expire && lastUpdate.Add(p.expiration).Before(now) {
if expire && lastUpdate.Add(p.expiration).Before(t) {
if count.canDelete {
p.counters.Delete(k)
return true
}
// Counters remain at their previous value when not observed, so we do not set it to NaN.
}
count.Collect(c)
return true

View File

@ -1,5 +1,21 @@
# UNRELEASED
# 1.3.0 (April 22nd, 2021)
IMPROVEMENTS
* Added metrics for `oldestLogAge` and `lastRestoreDuration` to monitor capacity issues that can cause unrecoverable cluster failure [[GH-452](https://github.com/hashicorp/raft/pull/452)][[GH-454](https://github.com/hashicorp/raft/pull/454/files)]
* Made `TrailingLogs`, `SnapshotInterval` and `SnapshotThreshold` reloadable at runtime using a new `ReloadConfig` method. This allows recovery from cases where there are not enough logs retained for followers to catchup after a restart. [[GH-444](https://github.com/hashicorp/raft/pull/444)]
* Inclusify the repository by switching to main [[GH-446](https://github.com/hashicorp/raft/pull/446)]
* Add option for a buffered `ApplyCh` if `MaxAppendEntries` is enabled [[GH-445](https://github.com/hashicorp/raft/pull/445)]
* Add string to `LogType` for more human readable debugging [[GH-442](https://github.com/hashicorp/raft/pull/442)]
* Extract fuzzy testing into its own module [[GH-459](https://github.com/hashicorp/raft/pull/459)]
BUG FIXES
* Update LogCache `StoreLogs()` to capture an error that would previously cause a panic [[GH-460](https://github.com/hashicorp/raft/pull/460)]
# 1.2.0 (October 5th, 2020)
IMPROVEMENTS
* Remove `StartAsLeader` configuration option [[GH-364](https://github.com/hashicorp/raft/pull/386)]
@ -85,4 +101,4 @@ v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. Th
# 0.1.0 (September 29th, 2017)
v0.1.0 is the original stable version of the library that was in master and has been maintained with no breaking API changes. This was in use by Consul prior to version 0.7.0.
v0.1.0 is the original stable version of the library that was in main and has been maintained with no breaking API changes. This was in use by Consul prior to version 0.7.0.

View File

@ -16,28 +16,28 @@ endif
TEST_RESULTS_DIR?=/tmp/test-results
test:
GOTRACEBACK=all go test $(TESTARGS) -timeout=60s -race .
GOTRACEBACK=all go test $(TESTARGS) -timeout=60s -tags batchtest -race .
GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -race .
GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -tags batchtest -race .
integ: test
INTEG_TESTS=yes go test $(TESTARGS) -timeout=25s -run=Integ .
INTEG_TESTS=yes go test $(TESTARGS) -timeout=25s -tags batchtest -run=Integ .
INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -run=Integ .
INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -tags batchtest -run=Integ .
ci.test-norace:
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -tags batchtest
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -tags batchtest
ci.test:
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -race .
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -race -tags batchtest .
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race .
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race -tags batchtest .
ci.integ: ci.test
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=25s -run=Integ .
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=25s -run=Integ -tags batchtest .
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ .
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ -tags batchtest .
fuzz:
go test $(TESTARGS) -timeout=20m ./fuzzy
go test $(TESTARGS) -timeout=20m -tags batchtest ./fuzzy
cd ./fuzzy && go test $(TESTARGS) -timeout=20m .
cd ./fuzzy && go test $(TESTARGS) -timeout=20m -tags batchtest .
deps:
go get -t -d -v ./...

View File

@ -28,16 +28,21 @@ To prevent complications with cgo, the primary backend `MDBStore` is in a separa
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
for the `LogStore` and `StableStore`.
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
A pure Go backend using [Bbolt](https://github.com/etcd-io/bbolt) is also available called
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
and `StableStore`.
## Community Contributed Examples
[Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC
## Tagged Releases
As of September 2017, HashiCorp will start using tags for this library to clearly indicate
major version updates. We recommend you vendor your application's dependency on this library.
* v0.1.0 is the original stable version of the library that was in master and has been maintained
* v0.1.0 is the original stable version of the library that was in main and has been maintained
with no breaking API changes. This was in use by Consul prior to version 0.7.0.
* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version
@ -104,4 +109,3 @@ greatly sacrificing performance.
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
committing a log entry requires a single round trip to half of the cluster.
Thus performance is bound by disk I/O and network latency.

View File

@ -81,8 +81,15 @@ type Raft struct {
// be committed and applied to the FSM.
applyCh chan *logFuture
// Configuration provided at Raft initialization
conf Config
// conf stores the current configuration to use. This is the most recent one
// provided. All reads of config values should use the config() helper method
// to read this safely.
conf atomic.Value
// confReloadMu ensures that only one thread can reload config at once since
// we need to read-modify-write the atomic. It is NOT necessary to hold this
// for any other operation e.g. reading config using config().
confReloadMu sync.Mutex
// FSM is the client state machine to apply commands to
fsm FSM
@ -199,7 +206,7 @@ type Raft struct {
// server. Any further attempts to bootstrap will return an error that can be
// safely ignored.
//
// One sane approach is to bootstrap a single server with a configuration
// One approach is to bootstrap a single server with a configuration
// listing just itself as a Voter, then invoke AddVoter() on it to add other
// servers to the cluster.
func BootstrapCluster(conf *Config, logs LogStore, stable StableStore,
@ -316,6 +323,12 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore,
continue
}
// Note this is the one place we call fsm.Restore without the
// fsmRestoreAndMeasure wrapper since this function should only be called to
// reset state on disk and the FSM passed will not be used for a running
// server instance. If the same process will eventually become a Raft peer
// then it will call NewRaft and restore again from disk then which will
// report metrics.
err = fsm.Restore(source)
// Close the source after the restore has completed
source.Close()
@ -385,9 +398,9 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore,
return nil
}
// GetConfiguration returns the configuration of the Raft cluster without
// starting a Raft instance or connecting to the cluster
// This function has identical behavior to Raft.GetConfiguration
// GetConfiguration returns the persisted configuration of the Raft cluster
// without starting a Raft instance or connecting to the cluster. This function
// has identical behavior to Raft.GetConfiguration.
func GetConfiguration(conf *Config, fsm FSM, logs LogStore, stable StableStore,
snaps SnapshotStore, trans Transport) (Configuration, error) {
conf.skipStartup = true
@ -486,7 +499,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
// Make sure we have a valid server address and ID.
protocolVersion := conf.ProtocolVersion
localAddr := ServerAddress(trans.LocalAddr())
localAddr := trans.LocalAddr()
localID := conf.LocalID
// TODO (slackpad) - When we deprecate protocol version 2, remove this
@ -495,11 +508,16 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
return nil, fmt.Errorf("when running with ProtocolVersion < 3, LocalID must be set to the network address")
}
// Buffer applyCh to MaxAppendEntries if the option is enabled
applyCh := make(chan *logFuture)
if conf.BatchApplyCh {
applyCh = make(chan *logFuture, conf.MaxAppendEntries)
}
// Create Raft struct.
r := &Raft{
protocolVersion: protocolVersion,
applyCh: make(chan *logFuture),
conf: *conf,
applyCh: applyCh,
fsm: fsm,
fsmMutateCh: make(chan interface{}, 128),
fsmSnapshotCh: make(chan *reqSnapshotFuture),
@ -524,6 +542,8 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
leadershipTransferCh: make(chan *leadershipTransferFuture, 1),
}
r.conf.Store(*conf)
// Initialize as a follower.
r.setState(Follower)
@ -577,23 +597,23 @@ func (r *Raft) restoreSnapshot() error {
// Try to load in order of newest to oldest
for _, snapshot := range snapshots {
if !r.conf.NoSnapshotRestoreOnStart {
if !r.config().NoSnapshotRestoreOnStart {
_, source, err := r.snapshots.Open(snapshot.ID)
if err != nil {
r.logger.Error("failed to open snapshot", "id", snapshot.ID, "error", err)
continue
}
err = r.fsm.Restore(source)
// Close the source after the restore has completed
source.Close()
if err != nil {
if err := fsmRestoreAndMeasure(r.fsm, source); err != nil {
source.Close()
r.logger.Error("failed to restore snapshot", "id", snapshot.ID, "error", err)
continue
}
source.Close()
r.logger.Info("restored from snapshot", "id", snapshot.ID)
}
// Update the lastApplied so we don't replay old logs
r.setLastApplied(snapshot.Index)
@ -624,6 +644,45 @@ func (r *Raft) restoreSnapshot() error {
return nil
}
func (r *Raft) config() Config {
return r.conf.Load().(Config)
}
// ReloadConfig updates the configuration of a running raft node. If the new
// configuration is invalid an error is returned and no changes made to the
// instance. All fields will be copied from rc into the new configuration, even
// if they are zero valued.
func (r *Raft) ReloadConfig(rc ReloadableConfig) error {
r.confReloadMu.Lock()
defer r.confReloadMu.Unlock()
// Load the current config (note we are under a lock so it can't be changed
// between this read and a later Store).
oldCfg := r.config()
// Set the reloadable fields
newCfg := rc.apply(oldCfg)
if err := ValidateConfig(&newCfg); err != nil {
return err
}
r.conf.Store(newCfg)
return nil
}
// ReloadableConfig returns the current state of the reloadable fields in Raft's
// configuration. This is useful for programs to discover the current state for
// reporting to users or tests. It is safe to call from any goroutine. It is
// intended for reporting and testing purposes primarily; external
// synchronization would be required to safely use this in a read-modify-write
// pattern for reloadable configuration options.
func (r *Raft) ReloadableConfig() ReloadableConfig {
cfg := r.config()
var rc ReloadableConfig
rc.fromConfig(cfg)
return rc
}
// BootstrapCluster is equivalent to non-member BootstrapCluster but can be
// called on an un-bootstrapped Raft instance after it has been created. This
// should only be called at the beginning of time for the cluster with an

View File

@ -23,7 +23,7 @@ type commitment struct {
startIndex uint64
}
// newCommitment returns an commitment struct that notifies the provided
// newCommitment returns a commitment struct that notifies the provided
// channel when log entries have been committed. A new commitment struct is
// created each time this server becomes leader for a particular term.
// 'configuration' is the servers in the cluster.

View File

@ -151,25 +151,36 @@ type Config struct {
// an inconsistent log.
MaxAppendEntries int
// BatchApplyCh indicates whether we should buffer applyCh
// to size MaxAppendEntries. This enables batch log commitment,
// but breaks the timeout guarantee on Apply. Specifically,
// a log can be added to the applyCh buffer but not actually be
// processed until after the specified timeout.
BatchApplyCh bool
// If we are a member of a cluster, and RemovePeer is invoked for the
// local node, then we forget all peers and transition into the follower state.
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
// If ShutdownOnRemove is set, we additional shutdown Raft. Otherwise,
// we can become a leader of a cluster containing only this node.
ShutdownOnRemove bool
// TrailingLogs controls how many logs we leave after a snapshot. This is
// used so that we can quickly replay logs on a follower instead of being
// forced to send an entire snapshot.
// TrailingLogs controls how many logs we leave after a snapshot. This is used
// so that we can quickly replay logs on a follower instead of being forced to
// send an entire snapshot. The value passed here is the initial setting used.
// This can be tuned during operation using ReloadConfig.
TrailingLogs uint64
// SnapshotInterval controls how often we check if we should perform a snapshot.
// We randomly stagger between this value and 2x this value to avoid the entire
// cluster from performing a snapshot at once.
// SnapshotInterval controls how often we check if we should perform a
// snapshot. We randomly stagger between this value and 2x this value to avoid
// the entire cluster from performing a snapshot at once. The value passed
// here is the initial setting used. This can be tuned during operation using
// ReloadConfig.
SnapshotInterval time.Duration
// SnapshotThreshold controls how many outstanding logs there must be before
// we perform a snapshot. This is to prevent excessive snapshots when we can
// just replay a small set of logs.
// we perform a snapshot. This is to prevent excessive snapshotting by
// replaying a small set of logs instead. The value passed here is the initial
// setting used. This can be tuned during operation using ReloadConfig.
SnapshotThreshold uint64
// LeaderLeaseTimeout is used to control how long the "lease" lasts
@ -178,7 +189,7 @@ type Config struct {
// step down as leader.
LeaderLeaseTimeout time.Duration
// The unique ID for this server across all time. When running with
// LocalID is a unique ID for this server across all time. When running with
// ProtocolVersion < 3, you must set this to be the same as the network
// address of your transport.
LocalID ServerID
@ -192,25 +203,65 @@ type Config struct {
// Defaults to os.Stderr.
LogOutput io.Writer
// LogLevel represents a log level. If a no matching string is specified,
// hclog.NoLevel is assumed.
// LogLevel represents a log level. If the value does not match a known
// logging level hclog.NoLevel is used.
LogLevel string
// Logger is a user-provided hc-log logger. If nil, a logger writing to
// Logger is a user-provided logger. If nil, a logger writing to
// LogOutput with LogLevel is used.
Logger hclog.Logger
// NoSnapshotRestoreOnStart controls if raft will restore a snapshot to the
// FSM on start. This is useful if your FSM recovers from other mechanisms
// than raft snapshotting. Snapshot metadata will still be used to initialize
// raft's configuration and index values. This is used in NewRaft and
// RestoreCluster.
// raft's configuration and index values.
NoSnapshotRestoreOnStart bool
// skipStartup allows NewRaft() to bypass all background work goroutines
skipStartup bool
}
// ReloadableConfig is the subset of Config that may be reconfigured during
// runtime using raft.ReloadConfig. We choose to duplicate fields over embedding