Make Raft trailing logs and snapshot timing reloadable (#10129)
* WIP reloadable raft config * Pre-define new raft gauges * Update go-metrics to change gauge reset behaviour * Update raft to pull in new metric and reloadable config * Add snapshot persistance timing and installSnapshot to our 'protected' list as they can be infrequent but are important * Update telemetry docs * Update config and telemetry docs * Add note to oldestLogAge on when it is visible * Add changelog entry * Update website/content/docs/agent/options.mdx Co-authored-by: Matt Keeler <mkeeler@users.noreply.github.com> Co-authored-by: Matt Keeler <mkeeler@users.noreply.github.com>
This commit is contained in:
parent
eb84a856c4
commit
d47eea3a3f
|
@ -0,0 +1,4 @@
|
|||
```release-note:improvement
|
||||
raft: allow reloading of raft trailing logs and snapshot timing to allow recovery from some [replication failure modes](https://github.com/hashicorp/consul/issues/9609).
|
||||
telemetry: add metrics and documentation for [monitoring for replication issues](https://consul.io/docs/agent/telemetry#raft-replication-capacity-issues).
|
||||
```
|
|
@ -3639,10 +3639,13 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error {
|
|||
}
|
||||
|
||||
cc := consul.ReloadableConfig{
|
||||
RPCRateLimit: newCfg.RPCRateLimit,
|
||||
RPCMaxBurst: newCfg.RPCMaxBurst,
|
||||
RPCMaxConnsPerClient: newCfg.RPCMaxConnsPerClient,
|
||||
ConfigEntryBootstrap: newCfg.ConfigEntryBootstrap,
|
||||
RPCRateLimit: newCfg.RPCRateLimit,
|
||||
RPCMaxBurst: newCfg.RPCMaxBurst,
|
||||
RPCMaxConnsPerClient: newCfg.RPCMaxConnsPerClient,
|
||||
ConfigEntryBootstrap: newCfg.ConfigEntryBootstrap,
|
||||
RaftSnapshotThreshold: newCfg.RaftSnapshotThreshold,
|
||||
RaftSnapshotInterval: newCfg.RaftSnapshotInterval,
|
||||
RaftTrailingLogs: newCfg.RaftTrailingLogs,
|
||||
}
|
||||
if err := a.delegate.ReloadConfig(cc); err != nil {
|
||||
return err
|
||||
|
|
|
@ -659,8 +659,11 @@ type RPCConfig struct {
|
|||
// ReloadableConfig is the configuration that is passed to ReloadConfig when
|
||||
// application config is reloaded.
|
||||
type ReloadableConfig struct {
|
||||
RPCRateLimit rate.Limit
|
||||
RPCMaxBurst int
|
||||
RPCMaxConnsPerClient int
|
||||
ConfigEntryBootstrap []structs.ConfigEntry
|
||||
RPCRateLimit rate.Limit
|
||||
RPCMaxBurst int
|
||||
RPCMaxConnsPerClient int
|
||||
ConfigEntryBootstrap []structs.ConfigEntry
|
||||
RaftSnapshotThreshold int
|
||||
RaftSnapshotInterval time.Duration
|
||||
RaftTrailingLogs int
|
||||
}
|
||||
|
|
|
@ -1387,6 +1387,13 @@ func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) {
|
|||
// ReloadConfig is used to have the Server do an online reload of
|
||||
// relevant configuration information
|
||||
func (s *Server) ReloadConfig(config ReloadableConfig) error {
|
||||
// Reload raft config first before updating any other state since it could
|
||||
// error if the new config is invalid.
|
||||
raftCfg := computeRaftReloadableConfig(config)
|
||||
if err := s.raft.ReloadConfig(raftCfg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.rpcLimiter.Store(rate.NewLimiter(config.RPCRateLimit, config.RPCMaxBurst))
|
||||
s.rpcConnLimiter.SetConfig(connlimit.Config{
|
||||
MaxConnsPerClientIP: config.RPCMaxConnsPerClient,
|
||||
|
@ -1401,6 +1408,33 @@ func (s *Server) ReloadConfig(config ReloadableConfig) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// computeRaftReloadableConfig works out the correct reloadable config for raft.
|
||||
// We reload raft even if nothing has changed since it's cheap and simpler than
|
||||
// trying to work out if it's different from the current raft config. This
|
||||
// function is separate to make it cheap to table test thoroughly without a full
|
||||
// raft instance.
|
||||
func computeRaftReloadableConfig(config ReloadableConfig) raft.ReloadableConfig {
|
||||
// We use the raw defaults _not_ the current values so that you can reload
|
||||
// back to a zero value having previously started Consul with a custom value
|
||||
// for one of these fields.
|
||||
defaultConf := DefaultConfig()
|
||||
raftCfg := raft.ReloadableConfig{
|
||||
TrailingLogs: defaultConf.RaftConfig.TrailingLogs,
|
||||
SnapshotInterval: defaultConf.RaftConfig.SnapshotInterval,
|
||||
SnapshotThreshold: defaultConf.RaftConfig.SnapshotThreshold,
|
||||
}
|
||||
if config.RaftSnapshotThreshold != 0 {
|
||||
raftCfg.SnapshotThreshold = uint64(config.RaftSnapshotThreshold)
|
||||
}
|
||||
if config.RaftSnapshotInterval != 0 {
|
||||
raftCfg.SnapshotInterval = config.RaftSnapshotInterval
|
||||
}
|
||||
if config.RaftTrailingLogs != 0 {
|
||||
raftCfg.TrailingLogs = uint64(config.RaftTrailingLogs)
|
||||
}
|
||||
return raftCfg
|
||||
}
|
||||
|
||||
// Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
|
||||
func (s *Server) setConsistentReadReady() {
|
||||
atomic.StoreInt32(&s.readyForConsistentReads, 1)
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
"github.com/google/tcpproxy"
|
||||
"github.com/hashicorp/memberlist"
|
||||
"github.com/hashicorp/raft"
|
||||
|
||||
"github.com/hashicorp/consul/agent/connect/ca"
|
||||
"github.com/hashicorp/consul/ipaddr"
|
||||
|
@ -1466,6 +1467,9 @@ func TestServer_ReloadConfig(t *testing.T) {
|
|||
c.Build = "1.5.0"
|
||||
c.RPCRateLimit = 500
|
||||
c.RPCMaxBurst = 5000
|
||||
// Set one raft param to be non-default in the initial config, others are
|
||||
// default.
|
||||
c.RaftConfig.TrailingLogs = 1234
|
||||
})
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s.Shutdown()
|
||||
|
@ -1480,6 +1484,14 @@ func TestServer_ReloadConfig(t *testing.T) {
|
|||
RPCRateLimit: 1000,
|
||||
RPCMaxBurst: 10000,
|
||||
ConfigEntryBootstrap: []structs.ConfigEntry{entryInit},
|
||||
// Reset the custom one to default be removing it from config file (it will
|
||||
// be a zero value here).
|
||||
RaftTrailingLogs: 0,
|
||||
|
||||
// Set a different Raft param to something custom now
|
||||
RaftSnapshotThreshold: 4321,
|
||||
|
||||
// Leave other raft fields default
|
||||
}
|
||||
require.NoError(t, s.ReloadConfig(rc))
|
||||
|
||||
|
@ -1496,6 +1508,98 @@ func TestServer_ReloadConfig(t *testing.T) {
|
|||
limiter = s.rpcLimiter.Load().(*rate.Limiter)
|
||||
require.Equal(t, rate.Limit(1000), limiter.Limit())
|
||||
require.Equal(t, 10000, limiter.Burst())
|
||||
|
||||
// Check raft config
|
||||
defaults := DefaultConfig()
|
||||
got := s.raft.ReloadableConfig()
|
||||
require.Equal(t, uint64(4321), got.SnapshotThreshold,
|
||||
"should have be reloaded to new value")
|
||||
require.Equal(t, defaults.RaftConfig.SnapshotInterval, got.SnapshotInterval,
|
||||
"should have remained the default interval")
|
||||
require.Equal(t, defaults.RaftConfig.TrailingLogs, got.TrailingLogs,
|
||||
"should have reloaded to default trailing_logs")
|
||||
|
||||
// Now check that update each of those raft fields separately works correctly
|
||||
// too.
|
||||
}
|
||||
|
||||
func TestServer_computeRaftReloadableConfig(t *testing.T) {
|
||||
|
||||
defaults := DefaultConfig().RaftConfig
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
rc ReloadableConfig
|
||||
want raft.ReloadableConfig
|
||||
}{
|
||||
{
|
||||
// This case is the common path - reload is called with a ReloadableConfig
|
||||
// populated from the RuntimeConfig which has zero values for the fields.
|
||||
// On startup we selectively pick non-zero runtime config fields to
|
||||
// override defaults so we need to do the same.
|
||||
name: "Still defaults",
|
||||
rc: ReloadableConfig{},
|
||||
want: raft.ReloadableConfig{
|
||||
SnapshotThreshold: defaults.SnapshotThreshold,
|
||||
SnapshotInterval: defaults.SnapshotInterval,
|
||||
TrailingLogs: defaults.TrailingLogs,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Threshold set",
|
||||
rc: ReloadableConfig{
|
||||
RaftSnapshotThreshold: 123456,
|
||||
},
|
||||
want: raft.ReloadableConfig{
|
||||
SnapshotThreshold: 123456,
|
||||
SnapshotInterval: defaults.SnapshotInterval,
|
||||
TrailingLogs: defaults.TrailingLogs,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "interval set",
|
||||
rc: ReloadableConfig{
|
||||
RaftSnapshotInterval: 13 * time.Minute,
|
||||
},
|
||||
want: raft.ReloadableConfig{
|
||||
SnapshotThreshold: defaults.SnapshotThreshold,
|
||||
SnapshotInterval: 13 * time.Minute,
|
||||
TrailingLogs: defaults.TrailingLogs,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "trailing logs set",
|
||||
rc: ReloadableConfig{
|
||||
RaftTrailingLogs: 78910,
|
||||
},
|
||||
want: raft.ReloadableConfig{
|
||||
SnapshotThreshold: defaults.SnapshotThreshold,
|
||||
SnapshotInterval: defaults.SnapshotInterval,
|
||||
TrailingLogs: 78910,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "all set",
|
||||
rc: ReloadableConfig{
|
||||
RaftSnapshotThreshold: 123456,
|
||||
RaftSnapshotInterval: 13 * time.Minute,
|
||||
RaftTrailingLogs: 78910,
|
||||
},
|
||||
want: raft.ReloadableConfig{
|
||||
SnapshotThreshold: 123456,
|
||||
SnapshotInterval: 13 * time.Minute,
|
||||
TrailingLogs: 78910,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := computeRaftReloadableConfig(tc.rc)
|
||||
require.Equal(t, tc.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_RPC_RateLimit(t *testing.T) {
|
||||
|
|
|
@ -175,6 +175,19 @@ func registerWithGRPC(b grpcresolver.Builder) {
|
|||
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
|
||||
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
|
||||
func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) {
|
||||
// TODO: "raft..." metrics come from the raft lib and we should migrate these to a telemetry
|
||||
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
|
||||
raftGauges := []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"raft", "fsm", "lastRestoreDuration"},
|
||||
Help: "This measures how long the last FSM restore (from disk or leader) took.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "leader", "oldestLogAge"},
|
||||
Help: "This measures how old the oldest log in the leader's log store is.",
|
||||
},
|
||||
}
|
||||
|
||||
// Build slice of slices for all gauge definitions
|
||||
var gauges = [][]prometheus.GaugeDefinition{
|
||||
cache.Gauges,
|
||||
|
@ -185,7 +198,9 @@ func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, [
|
|||
usagemetrics.Gauges,
|
||||
consul.ReplicationGauges,
|
||||
Gauges,
|
||||
raftGauges,
|
||||
}
|
||||
|
||||
// Flatten definitions
|
||||
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
|
||||
var gaugeDefs []prometheus.GaugeDefinition
|
||||
|
@ -252,6 +267,14 @@ func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, [
|
|||
Name: []string{"raft", "leader", "lastContact"},
|
||||
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "snapshot", "persist"},
|
||||
Help: "Measures the time it takes raft to write a new snapshot to disk.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "rpc", "installSnapshot"},
|
||||
Help: "Measures the time it takes the raft leader to install a snapshot on a follower that is catching up after being down or has just joined the cluster.",
|
||||
},
|
||||
}
|
||||
|
||||
var summaries = [][]prometheus.SummaryDefinition{
|
||||
|
|
4
go.mod
4
go.mod
|
@ -12,7 +12,7 @@ require (
|
|||
github.com/Microsoft/go-winio v0.4.3 // indirect
|
||||
github.com/NYTimes/gziphandler v1.0.1
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
|
||||
github.com/armon/go-metrics v0.3.6
|
||||
github.com/armon/go-metrics v0.3.7
|
||||
github.com/armon/go-radix v1.0.0
|
||||
github.com/aws/aws-sdk-go v1.25.41
|
||||
github.com/coredns/coredns v1.1.2
|
||||
|
@ -52,7 +52,7 @@ require (
|
|||
github.com/hashicorp/mdns v1.0.4 // indirect
|
||||
github.com/hashicorp/memberlist v0.2.3
|
||||
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69
|
||||
github.com/hashicorp/raft v1.2.0
|
||||
github.com/hashicorp/raft v1.3.0
|
||||
github.com/hashicorp/raft-autopilot v0.1.2
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
|
||||
github.com/hashicorp/serf v0.9.5
|
||||
|
|
7
go.sum
7
go.sum
|
@ -58,8 +58,8 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5
|
|||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
|
||||
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
|
||||
github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs=
|
||||
github.com/armon/go-metrics v0.3.6 h1:x/tmtOF9cDBoXH7XoAGOz2qqm1DknFD1590XmD/DUJ8=
|
||||
github.com/armon/go-metrics v0.3.6/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
|
||||
github.com/armon/go-metrics v0.3.7 h1:c/oCtWzYpboy6+6f6LjXRlyW7NwA2SWf+a9KMlHq/bM=
|
||||
github.com/armon/go-metrics v0.3.7/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
|
||||
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
|
||||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
|
@ -279,8 +279,9 @@ github.com/hashicorp/memberlist v0.2.3/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
|||
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69 h1:lc3c72qGlIMDqQpQH82Y4vaglRMMFdJbziYWriR4UcE=
|
||||
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69/go.mod h1:/z+jUGRBlwVpUZfjute9jWaF6/HuhjuFQuL1YXzVD1Q=
|
||||
github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
|
||||
github.com/hashicorp/raft v1.2.0 h1:mHzHIrF0S91d3A7RPBvuqkgB4d/7oFJZyvf1Q4m7GA0=
|
||||
github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
|
||||
github.com/hashicorp/raft v1.3.0 h1:Wox4J4R7J2FOJLtTa6hdk0VJfiNUSP32pYoYR738bkE=
|
||||
github.com/hashicorp/raft v1.3.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM=
|
||||
github.com/hashicorp/raft-autopilot v0.1.2 h1:yeqdUjWLjVJkBM+mcVxqwxi+w+aHsb9cEON2dz69OCs=
|
||||
github.com/hashicorp/raft-autopilot v0.1.2/go.mod h1:Af4jZBwaNOI+tXfIqIdbcAnh/UyyqIMj/pOISIfhArw=
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBAS2J0VBzVa5e80b5ZtYuNQtgXjN40qBZlD4=
|
||||
|
|
|
@ -5,7 +5,6 @@ package prometheus
|
|||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -31,17 +30,16 @@ type PrometheusOpts struct {
|
|||
Expiration time.Duration
|
||||
Registerer prometheus.Registerer
|
||||
|
||||
// Gauges, Summaries, and Counters allow us to pre-declare metrics by giving their Name, Help, and ConstLabels to
|
||||
// the PrometheusSink when it is created. Metrics declared in this way will be initialized at zero and will not be
|
||||
// deleted when their expiry is reached.
|
||||
// - Gauges and Summaries will be set to NaN when they expire.
|
||||
// - Counters continue to Collect their last known value.
|
||||
// Ex:
|
||||
// PrometheusOpts{
|
||||
// Gauges, Summaries, and Counters allow us to pre-declare metrics by giving
|
||||
// their Name, Help, and ConstLabels to the PrometheusSink when it is created.
|
||||
// Metrics declared in this way will be initialized at zero and will not be
|
||||
// deleted or altered when their expiry is reached.
|
||||
//
|
||||
// Ex: PrometheusOpts{
|
||||
// Expiration: 10 * time.Second,
|
||||
// Gauges: []GaugeDefinition{
|
||||
// {
|
||||
// Name: []string{ "application", "component", "measurement"},
|
||||
// Name: []string{ "application", "component", "measurement"},
|
||||
// Help: "application_component_measurement provides an example of how to declare static metrics",
|
||||
// ConstLabels: []metrics.Label{ { Name: "my_label", Value: "does_not_change" }, },
|
||||
// },
|
||||
|
@ -139,21 +137,24 @@ func (p *PrometheusSink) Describe(c chan<- *prometheus.Desc) {
|
|||
// logic to clean up ephemeral metrics if their value haven't been set for a
|
||||
// duration exceeding our allowed expiration time.
|
||||
func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
|
||||
p.collectAtTime(c, time.Now())
|
||||
}
|
||||
|
||||
// collectAtTime allows internal testing of the expiry based logic here without
|
||||
// mocking clocks or making tests timing sensitive.
|
||||
func (p *PrometheusSink) collectAtTime(c chan<- prometheus.Metric, t time.Time) {
|
||||
expire := p.expiration != 0
|
||||
now := time.Now()
|
||||
p.gauges.Range(func(k, v interface{}) bool {
|
||||
if v == nil {
|
||||
return true
|
||||
}
|
||||
g := v.(*gauge)
|
||||
lastUpdate := g.updatedAt
|
||||
if expire && lastUpdate.Add(p.expiration).Before(now) {
|
||||
if expire && lastUpdate.Add(p.expiration).Before(t) {
|
||||
if g.canDelete {
|
||||
p.gauges.Delete(k)
|
||||
return true
|
||||
}
|
||||
// We have not observed the gauge this interval so we don't know its value.
|
||||
g.Set(math.NaN())
|
||||
}
|
||||
g.Collect(c)
|
||||
return true
|
||||
|
@ -164,13 +165,11 @@ func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
|
|||
}
|
||||
s := v.(*summary)
|
||||
lastUpdate := s.updatedAt
|
||||
if expire && lastUpdate.Add(p.expiration).Before(now) {
|
||||
if expire && lastUpdate.Add(p.expiration).Before(t) {
|
||||
if s.canDelete {
|
||||
p.summaries.Delete(k)
|
||||
return true
|
||||
}
|
||||
// We have observed nothing in this interval.
|
||||
s.Observe(math.NaN())
|
||||
}
|
||||
s.Collect(c)
|
||||
return true
|
||||
|
@ -181,12 +180,11 @@ func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) {
|
|||
}
|
||||
count := v.(*counter)
|
||||
lastUpdate := count.updatedAt
|
||||
if expire && lastUpdate.Add(p.expiration).Before(now) {
|
||||
if expire && lastUpdate.Add(p.expiration).Before(t) {
|
||||
if count.canDelete {
|
||||
p.counters.Delete(k)
|
||||
return true
|
||||
}
|
||||
// Counters remain at their previous value when not observed, so we do not set it to NaN.
|
||||
}
|
||||
count.Collect(c)
|
||||
return true
|
||||
|
|
|
@ -1,5 +1,21 @@
|
|||
# UNRELEASED
|
||||
|
||||
# 1.3.0 (April 22nd, 2021)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
* Added metrics for `oldestLogAge` and `lastRestoreDuration` to monitor capacity issues that can cause unrecoverable cluster failure [[GH-452](https://github.com/hashicorp/raft/pull/452)][[GH-454](https://github.com/hashicorp/raft/pull/454/files)]
|
||||
* Made `TrailingLogs`, `SnapshotInterval` and `SnapshotThreshold` reloadable at runtime using a new `ReloadConfig` method. This allows recovery from cases where there are not enough logs retained for followers to catchup after a restart. [[GH-444](https://github.com/hashicorp/raft/pull/444)]
|
||||
* Inclusify the repository by switching to main [[GH-446](https://github.com/hashicorp/raft/pull/446)]
|
||||
* Add option for a buffered `ApplyCh` if `MaxAppendEntries` is enabled [[GH-445](https://github.com/hashicorp/raft/pull/445)]
|
||||
* Add string to `LogType` for more human readable debugging [[GH-442](https://github.com/hashicorp/raft/pull/442)]
|
||||
* Extract fuzzy testing into its own module [[GH-459](https://github.com/hashicorp/raft/pull/459)]
|
||||
|
||||
BUG FIXES
|
||||
* Update LogCache `StoreLogs()` to capture an error that would previously cause a panic [[GH-460](https://github.com/hashicorp/raft/pull/460)]
|
||||
|
||||
# 1.2.0 (October 5th, 2020)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
* Remove `StartAsLeader` configuration option [[GH-364](https://github.com/hashicorp/raft/pull/386)]
|
||||
|
@ -85,4 +101,4 @@ v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. Th
|
|||
|
||||
# 0.1.0 (September 29th, 2017)
|
||||
|
||||
v0.1.0 is the original stable version of the library that was in master and has been maintained with no breaking API changes. This was in use by Consul prior to version 0.7.0.
|
||||
v0.1.0 is the original stable version of the library that was in main and has been maintained with no breaking API changes. This was in use by Consul prior to version 0.7.0.
|
||||
|
|
|
@ -16,28 +16,28 @@ endif
|
|||
TEST_RESULTS_DIR?=/tmp/test-results
|
||||
|
||||
test:
|
||||
GOTRACEBACK=all go test $(TESTARGS) -timeout=60s -race .
|
||||
GOTRACEBACK=all go test $(TESTARGS) -timeout=60s -tags batchtest -race .
|
||||
GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -race .
|
||||
GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -tags batchtest -race .
|
||||
|
||||
integ: test
|
||||
INTEG_TESTS=yes go test $(TESTARGS) -timeout=25s -run=Integ .
|
||||
INTEG_TESTS=yes go test $(TESTARGS) -timeout=25s -tags batchtest -run=Integ .
|
||||
INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -run=Integ .
|
||||
INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -tags batchtest -run=Integ .
|
||||
|
||||
ci.test-norace:
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -tags batchtest
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -tags batchtest
|
||||
|
||||
ci.test:
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -race .
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=60s -race -tags batchtest .
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race .
|
||||
gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-test.xml -- -timeout=180s -race -tags batchtest .
|
||||
|
||||
ci.integ: ci.test
|
||||
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=25s -run=Integ .
|
||||
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=25s -run=Integ -tags batchtest .
|
||||
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ .
|
||||
INTEG_TESTS=yes gotestsum --format=short-verbose --junitfile $(TEST_RESULTS_DIR)/gotestsum-report-integ.xml -- -timeout=60s -run=Integ -tags batchtest .
|
||||
|
||||
fuzz:
|
||||
go test $(TESTARGS) -timeout=20m ./fuzzy
|
||||
go test $(TESTARGS) -timeout=20m -tags batchtest ./fuzzy
|
||||
cd ./fuzzy && go test $(TESTARGS) -timeout=20m .
|
||||
cd ./fuzzy && go test $(TESTARGS) -timeout=20m -tags batchtest .
|
||||
|
||||
deps:
|
||||
go get -t -d -v ./...
|
||||
|
|
|
@ -28,16 +28,21 @@ To prevent complications with cgo, the primary backend `MDBStore` is in a separa
|
|||
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
|
||||
for the `LogStore` and `StableStore`.
|
||||
|
||||
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
|
||||
A pure Go backend using [Bbolt](https://github.com/etcd-io/bbolt) is also available called
|
||||
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
|
||||
and `StableStore`.
|
||||
|
||||
|
||||
## Community Contributed Examples
|
||||
[Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC
|
||||
|
||||
|
||||
## Tagged Releases
|
||||
|
||||
As of September 2017, HashiCorp will start using tags for this library to clearly indicate
|
||||
major version updates. We recommend you vendor your application's dependency on this library.
|
||||
|
||||
* v0.1.0 is the original stable version of the library that was in master and has been maintained
|
||||
* v0.1.0 is the original stable version of the library that was in main and has been maintained
|
||||
with no breaking API changes. This was in use by Consul prior to version 0.7.0.
|
||||
|
||||
* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version
|
||||
|
@ -104,4 +109,3 @@ greatly sacrificing performance.
|
|||
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
|
||||
committing a log entry requires a single round trip to half of the cluster.
|
||||
Thus performance is bound by disk I/O and network latency.
|
||||
|
||||
|
|
|
@ -81,8 +81,15 @@ type Raft struct {
|
|||
// be committed and applied to the FSM.
|
||||
applyCh chan *logFuture
|
||||
|
||||
// Configuration provided at Raft initialization
|
||||
conf Config
|
||||
// conf stores the current configuration to use. This is the most recent one
|
||||
// provided. All reads of config values should use the config() helper method
|
||||
// to read this safely.
|
||||
conf atomic.Value
|
||||
|
||||
// confReloadMu ensures that only one thread can reload config at once since
|
||||
// we need to read-modify-write the atomic. It is NOT necessary to hold this
|
||||
// for any other operation e.g. reading config using config().
|
||||
confReloadMu sync.Mutex
|
||||
|
||||
// FSM is the client state machine to apply commands to
|
||||
fsm FSM
|
||||
|
@ -199,7 +206,7 @@ type Raft struct {
|
|||
// server. Any further attempts to bootstrap will return an error that can be
|
||||
// safely ignored.
|
||||
//
|
||||
// One sane approach is to bootstrap a single server with a configuration
|
||||
// One approach is to bootstrap a single server with a configuration
|
||||
// listing just itself as a Voter, then invoke AddVoter() on it to add other
|
||||
// servers to the cluster.
|
||||
func BootstrapCluster(conf *Config, logs LogStore, stable StableStore,
|
||||
|
@ -316,6 +323,12 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore,
|
|||
continue
|
||||
}
|
||||
|
||||
// Note this is the one place we call fsm.Restore without the
|
||||
// fsmRestoreAndMeasure wrapper since this function should only be called to
|
||||
// reset state on disk and the FSM passed will not be used for a running
|
||||
// server instance. If the same process will eventually become a Raft peer
|
||||
// then it will call NewRaft and restore again from disk then which will
|
||||
// report metrics.
|
||||
err = fsm.Restore(source)
|
||||
// Close the source after the restore has completed
|
||||
source.Close()
|
||||
|
@ -385,9 +398,9 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore,
|
|||
return nil
|
||||
}
|
||||
|
||||
// GetConfiguration returns the configuration of the Raft cluster without
|
||||
// starting a Raft instance or connecting to the cluster
|
||||
// This function has identical behavior to Raft.GetConfiguration
|
||||
// GetConfiguration returns the persisted configuration of the Raft cluster
|
||||
// without starting a Raft instance or connecting to the cluster. This function
|
||||
// has identical behavior to Raft.GetConfiguration.
|
||||
func GetConfiguration(conf *Config, fsm FSM, logs LogStore, stable StableStore,
|
||||
snaps SnapshotStore, trans Transport) (Configuration, error) {
|
||||
conf.skipStartup = true
|
||||
|
@ -486,7 +499,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
|
|||
|
||||
// Make sure we have a valid server address and ID.
|
||||
protocolVersion := conf.ProtocolVersion
|
||||
localAddr := ServerAddress(trans.LocalAddr())
|
||||
localAddr := trans.LocalAddr()
|
||||
localID := conf.LocalID
|
||||
|
||||
// TODO (slackpad) - When we deprecate protocol version 2, remove this
|
||||
|
@ -495,11 +508,16 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
|
|||
return nil, fmt.Errorf("when running with ProtocolVersion < 3, LocalID must be set to the network address")
|
||||
}
|
||||
|
||||
// Buffer applyCh to MaxAppendEntries if the option is enabled
|
||||
applyCh := make(chan *logFuture)
|
||||
if conf.BatchApplyCh {
|
||||
applyCh = make(chan *logFuture, conf.MaxAppendEntries)
|
||||
}
|
||||
|
||||
// Create Raft struct.
|
||||
r := &Raft{
|
||||
protocolVersion: protocolVersion,
|
||||
applyCh: make(chan *logFuture),
|
||||
conf: *conf,
|
||||
applyCh: applyCh,
|
||||
fsm: fsm,
|
||||
fsmMutateCh: make(chan interface{}, 128),
|
||||
fsmSnapshotCh: make(chan *reqSnapshotFuture),
|
||||
|
@ -524,6 +542,8 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
|
|||
leadershipTransferCh: make(chan *leadershipTransferFuture, 1),
|
||||
}
|
||||
|
||||
r.conf.Store(*conf)
|
||||
|
||||
// Initialize as a follower.
|
||||
r.setState(Follower)
|
||||
|
||||
|
@ -577,23 +597,23 @@ func (r *Raft) restoreSnapshot() error {
|
|||
|
||||
// Try to load in order of newest to oldest
|
||||
for _, snapshot := range snapshots {
|
||||
if !r.conf.NoSnapshotRestoreOnStart {
|
||||
if !r.config().NoSnapshotRestoreOnStart {
|
||||
_, source, err := r.snapshots.Open(snapshot.ID)
|
||||
if err != nil {
|
||||
r.logger.Error("failed to open snapshot", "id", snapshot.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
err = r.fsm.Restore(source)
|
||||
// Close the source after the restore has completed
|
||||
source.Close()
|
||||
if err != nil {
|
||||
if err := fsmRestoreAndMeasure(r.fsm, source); err != nil {
|
||||
source.Close()
|
||||
r.logger.Error("failed to restore snapshot", "id", snapshot.ID, "error", err)
|
||||
continue
|
||||
}
|
||||
source.Close()
|
||||
|
||||
r.logger.Info("restored from snapshot", "id", snapshot.ID)
|
||||
}
|
||||
|
||||
// Update the lastApplied so we don't replay old logs
|
||||
r.setLastApplied(snapshot.Index)
|
||||
|
||||
|
@ -624,6 +644,45 @@ func (r *Raft) restoreSnapshot() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (r *Raft) config() Config {
|
||||
return r.conf.Load().(Config)
|
||||
}
|
||||
|
||||
// ReloadConfig updates the configuration of a running raft node. If the new
|
||||
// configuration is invalid an error is returned and no changes made to the
|
||||
// instance. All fields will be copied from rc into the new configuration, even
|
||||
// if they are zero valued.
|
||||
func (r *Raft) ReloadConfig(rc ReloadableConfig) error {
|
||||
r.confReloadMu.Lock()
|
||||
defer r.confReloadMu.Unlock()
|
||||
|
||||
// Load the current config (note we are under a lock so it can't be changed
|
||||
// between this read and a later Store).
|
||||
oldCfg := r.config()
|
||||
|
||||
// Set the reloadable fields
|
||||
newCfg := rc.apply(oldCfg)
|
||||
|
||||
if err := ValidateConfig(&newCfg); err != nil {
|
||||
return err
|
||||
}
|
||||
r.conf.Store(newCfg)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReloadableConfig returns the current state of the reloadable fields in Raft's
|
||||
// configuration. This is useful for programs to discover the current state for
|
||||
// reporting to users or tests. It is safe to call from any goroutine. It is
|
||||
// intended for reporting and testing purposes primarily; external
|
||||
// synchronization would be required to safely use this in a read-modify-write
|
||||
// pattern for reloadable configuration options.
|
||||
func (r *Raft) ReloadableConfig() ReloadableConfig {
|
||||
cfg := r.config()
|
||||
var rc ReloadableConfig
|
||||
rc.fromConfig(cfg)
|
||||
return rc
|
||||
}
|
||||
|
||||
// BootstrapCluster is equivalent to non-member BootstrapCluster but can be
|
||||
// called on an un-bootstrapped Raft instance after it has been created. This
|
||||
// should only be called at the beginning of time for the cluster with an
|
||||
|
|
|
@ -23,7 +23,7 @@ type commitment struct {
|
|||
startIndex uint64
|
||||
}
|
||||
|
||||
// newCommitment returns an commitment struct that notifies the provided
|
||||
// newCommitment returns a commitment struct that notifies the provided
|
||||
// channel when log entries have been committed. A new commitment struct is
|
||||
// created each time this server becomes leader for a particular term.
|
||||
// 'configuration' is the servers in the cluster.
|
||||
|
|
|
@ -151,25 +151,36 @@ type Config struct {
|
|||
// an inconsistent log.
|
||||
MaxAppendEntries int
|
||||
|
||||
// BatchApplyCh indicates whether we should buffer applyCh
|
||||
// to size MaxAppendEntries. This enables batch log commitment,
|
||||
// but breaks the timeout guarantee on Apply. Specifically,
|
||||
// a log can be added to the applyCh buffer but not actually be
|
||||
// processed until after the specified timeout.
|
||||
BatchApplyCh bool
|
||||
|
||||
// If we are a member of a cluster, and RemovePeer is invoked for the
|
||||
// local node, then we forget all peers and transition into the follower state.
|
||||
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
|
||||
// If ShutdownOnRemove is set, we additional shutdown Raft. Otherwise,
|
||||
// we can become a leader of a cluster containing only this node.
|
||||
ShutdownOnRemove bool
|
||||
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot.
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is used
|
||||
// so that we can quickly replay logs on a follower instead of being forced to
|
||||
// send an entire snapshot. The value passed here is the initial setting used.
|
||||
// This can be tuned during operation using ReloadConfig.
|
||||
TrailingLogs uint64
|
||||
|
||||
// SnapshotInterval controls how often we check if we should perform a snapshot.
|
||||
// We randomly stagger between this value and 2x this value to avoid the entire
|
||||
// cluster from performing a snapshot at once.
|
||||
// SnapshotInterval controls how often we check if we should perform a
|
||||
// snapshot. We randomly stagger between this value and 2x this value to avoid
|
||||
// the entire cluster from performing a snapshot at once. The value passed
|
||||
// here is the initial setting used. This can be tuned during operation using
|
||||
// ReloadConfig.
|
||||
SnapshotInterval time.Duration
|
||||
|
||||
// SnapshotThreshold controls how many outstanding logs there must be before
|
||||
// we perform a snapshot. This is to prevent excessive snapshots when we can
|
||||
// just replay a small set of logs.
|
||||
// we perform a snapshot. This is to prevent excessive snapshotting by
|
||||
// replaying a small set of logs instead. The value passed here is the initial
|
||||
// setting used. This can be tuned during operation using ReloadConfig.
|
||||
SnapshotThreshold uint64
|
||||
|
||||
// LeaderLeaseTimeout is used to control how long the "lease" lasts
|
||||
|
@ -178,7 +189,7 @@ type Config struct {
|
|||
// step down as leader.
|
||||
LeaderLeaseTimeout time.Duration
|
||||
|
||||
// The unique ID for this server across all time. When running with
|
||||
// LocalID is a unique ID for this server across all time. When running with
|
||||
// ProtocolVersion < 3, you must set this to be the same as the network
|
||||
// address of your transport.
|
||||
LocalID ServerID
|
||||
|
@ -192,25 +203,65 @@ type Config struct {
|
|||
// Defaults to os.Stderr.
|
||||
LogOutput io.Writer
|
||||
|
||||
// LogLevel represents a log level. If a no matching string is specified,
|
||||
// hclog.NoLevel is assumed.
|
||||
// LogLevel represents a log level. If the value does not match a known
|
||||
// logging level hclog.NoLevel is used.
|
||||
LogLevel string
|
||||
|
||||
// Logger is a user-provided hc-log logger. If nil, a logger writing to
|
||||
// Logger is a user-provided logger. If nil, a logger writing to
|
||||
// LogOutput with LogLevel is used.
|
||||
Logger hclog.Logger
|
||||
|
||||
// NoSnapshotRestoreOnStart controls if raft will restore a snapshot to the
|
||||
// FSM on start. This is useful if your FSM recovers from other mechanisms
|
||||
// than raft snapshotting. Snapshot metadata will still be used to initialize
|
||||
// raft's configuration and index values. This is used in NewRaft and
|
||||
// RestoreCluster.
|
||||
// raft's configuration and index values.
|
||||
NoSnapshotRestoreOnStart bool
|
||||
|
||||
// skipStartup allows NewRaft() to bypass all background work goroutines
|
||||
skipStartup bool
|
||||
}
|
||||
|
||||
// ReloadableConfig is the subset of Config that may be reconfigured during
|
||||
// runtime using raft.ReloadConfig. We choose to duplicate fields over embedding
|
||||