Update serf to pick up clean leave fix
This commit is contained in:
parent
705b20d9bc
commit
98a04a0af9
|
@ -2,6 +2,7 @@ package lib
|
|||
|
||||
import (
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SerfDefaultConfig returns a Consul-flavored Serf default configuration,
|
||||
|
@ -16,5 +17,12 @@ func SerfDefaultConfig() *serf.Config {
|
|||
// cluster size.
|
||||
base.MinQueueDepth = 4096
|
||||
|
||||
// This gives leaves some time to propagate through the cluster before
|
||||
// we shut down. The value was chosen to be reasonably short, but to
|
||||
// allow a leave to get to over 99.99% of the cluster with 100k nodes
|
||||
// (using https://www.serf.io/docs/internals/simulator.html).
|
||||
base.LeavePropagateDelay = 3 * time.Second
|
||||
|
||||
|
||||
return base
|
||||
}
|
||||
|
|
|
@ -55,6 +55,13 @@ type Config struct {
|
|||
// set, a timeout of 5 seconds will be set.
|
||||
BroadcastTimeout time.Duration
|
||||
|
||||
// LeavePropagateDelay is for our leave (node dead) message to propagate
|
||||
// through the cluster. In particular, we want to stay up long enough to
|
||||
// service any probes from other nodes before they learn about us
|
||||
// leaving and stop probing. Otherwise, we risk getting node failures as
|
||||
// we leave.
|
||||
LeavePropagateDelay time.Duration
|
||||
|
||||
// The settings below relate to Serf's event coalescence feature. Serf
|
||||
// is able to coalesce multiple events into single events in order to
|
||||
// reduce the amount of noise that is sent along the EventCh. For example
|
||||
|
@ -255,6 +262,7 @@ func DefaultConfig() *Config {
|
|||
return &Config{
|
||||
NodeName: hostname,
|
||||
BroadcastTimeout: 5 * time.Second,
|
||||
LeavePropagateDelay: 1 * time.Second,
|
||||
EventBuffer: 512,
|
||||
QueryBuffer: 512,
|
||||
LogOutput: os.Stderr,
|
||||
|
|
|
@ -223,13 +223,16 @@ func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
|
|||
d.serf.queryClock.Witness(pp.QueryLTime - 1)
|
||||
}
|
||||
|
||||
// Process the left nodes first to avoid the LTimes from being increment
|
||||
// in the wrong order
|
||||
// Process the left nodes first to avoid the LTimes from incrementing
|
||||
// in the wrong order. Note that we don't have the actual Lamport time
|
||||
// for the leave message, so we go one past the join time, since the
|
||||
// leave must have been accepted after that to get onto the left members
|
||||
// list. If we didn't do this then the message would not get processed.
|
||||
leftMap := make(map[string]struct{}, len(pp.LeftMembers))
|
||||
leave := messageLeave{}
|
||||
for _, name := range pp.LeftMembers {
|
||||
leftMap[name] = struct{}{}
|
||||
leave.LTime = pp.StatusLTimes[name]
|
||||
leave.LTime = pp.StatusLTimes[name] + 1
|
||||
leave.Node = name
|
||||
d.serf.handleNodeLeaveIntent(&leave)
|
||||
}
|
||||
|
|
|
@ -691,6 +691,13 @@ func (s *Serf) Leave() error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Wait for the leave to propagate through the cluster. The broadcast
|
||||
// timeout is how long we wait for the message to go out from our own
|
||||
// queue, but this wait is for that message to propagate through the
|
||||
// cluster. In particular, we want to stay up long enough to service
|
||||
// any probes from other nodes before they learn about us leaving.
|
||||
time.Sleep(s.config.LeavePropagateDelay)
|
||||
|
||||
// Transition to Left only if we not already shutdown
|
||||
s.stateLock.Lock()
|
||||
if s.state != SerfShutdown {
|
||||
|
@ -1670,12 +1677,17 @@ func (s *Serf) Stats() map[string]string {
|
|||
return strconv.FormatUint(v, 10)
|
||||
}
|
||||
s.memberLock.RLock()
|
||||
defer s.memberLock.RUnlock()
|
||||
members := toString(uint64(len(s.members)))
|
||||
failed := toString(uint64(len(s.failedMembers)))
|
||||
left := toString(uint64(len(s.leftMembers)))
|
||||
health_score := toString(uint64(s.memberlist.GetHealthScore()))
|
||||
|
||||
s.memberLock.RUnlock()
|
||||
stats := map[string]string{
|
||||
"members": toString(uint64(len(s.members))),
|
||||
"failed": toString(uint64(len(s.failedMembers))),
|
||||
"left": toString(uint64(len(s.leftMembers))),
|
||||
"health_score": toString(uint64(s.memberlist.GetHealthScore())),
|
||||
"members": members,
|
||||
"failed": failed,
|
||||
"left": left,
|
||||
"health_score": health_score,
|
||||
"member_time": toString(uint64(s.clock.Time())),
|
||||
"event_time": toString(uint64(s.eventClock.Time())),
|
||||
"query_time": toString(uint64(s.queryClock.Time())),
|
||||
|
|
|
@ -72,8 +72,8 @@
|
|||
{"path":"github.com/hashicorp/net-rpc-msgpackrpc","checksumSHA1":"qnlqWJYV81ENr61SZk9c65R1mDo=","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3","revisionTime":"2015-11-16T02:03:38Z"},
|
||||
{"path":"github.com/hashicorp/raft","checksumSHA1":"JjJtGJi1ywWhVhs/PvTXxe4TeD8=","revision":"6d14f0c70869faabd9e60ba7ed88a6cbbd6a661f","revisionTime":"2017-10-03T22:09:13Z","version":"v1.0.0","versionExact":"v1.0.0"},
|
||||
{"path":"github.com/hashicorp/raft-boltdb","checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"},
|
||||
{"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"0PeWsO2aI+2PgVYlYlDPKfzCLEQ=","comment":"v0.7.0-66-g6c4672d","revision":"b6017ae61f4420ed0c02d5eeeb9ff3fc02953f14","revisionTime":"2018-01-19T22:43:00Z"},
|
||||
{"path":"github.com/hashicorp/serf/serf","checksumSHA1":"QGImnWfhk0ILLZszcf3vRs/Ft7g=","comment":"v0.7.0-66-g6c4672d","revision":"b6017ae61f4420ed0c02d5eeeb9ff3fc02953f14","revisionTime":"2018-01-19T22:43:00Z"},
|
||||
{"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"0PeWsO2aI+2PgVYlYlDPKfzCLEQ=","revision":"4b67f2c2b2bb5b748d934a6d48221062e43d2274","revisionTime":"2018-05-04T20:06:40Z"},
|
||||
{"path":"github.com/hashicorp/serf/serf","checksumSHA1":"QrT+nzyXsD/MmhTjjhcPdnALZ1I=","revision":"4b67f2c2b2bb5b748d934a6d48221062e43d2274","revisionTime":"2018-05-04T20:06:40Z"},
|
||||
{"path":"github.com/hashicorp/yamux","checksumSHA1":"NnWv17i1tpvBNJtpdRRWpE6j4LY=","revision":"2658be15c5f05e76244154714161f17e3e77de2e","revisionTime":"2018-03-14T20:07:45Z"},
|
||||
{"path":"github.com/mattn/go-isatty","checksumSHA1":"xZuhljnmBysJPta/lMyYmJdujCg=","revision":"66b8e73f3f5cda9f96b69efd03dd3d7fc4a5cdb8","revisionTime":"2016-08-06T12:27:52Z"},
|
||||
{"path":"github.com/miekg/dns","checksumSHA1":"XTeOihCDhjG6ltUKExoJ2uEzShk=","revision":"5364553f1ee9cddc7ac8b62dce148309c386695b","revisionTime":"2018-01-25T10:38:03Z","version":"v1.0.4","versionExact":"v1.0.4"},
|
||||
|
@ -86,9 +86,9 @@
|
|||
{"path":"github.com/mitchellh/reflectwalk","checksumSHA1":"mrqMlK6gqe//WsJSrJ1HgkPM0lM=","revision":"eecf4c70c626c7cfbb95c90195bc34d386c74ac6","revisionTime":"2015-05-27T15:31:53Z"},
|
||||
{"path":"github.com/pascaldekloe/goe/verify","checksumSHA1":"5h+ERzHw3Rl2G0kFPxoJzxiA9s0=","revision":"07ebd1e2481f616a278ab431cf04cc5cf5ab3ebe","revisionTime":"2017-03-28T18:37:59Z"},
|
||||
{"path":"github.com/pkg/errors","checksumSHA1":"ynJSWoF6v+3zMnh9R0QmmG6iGV8=","revision":"ff09b135c25aae272398c51a07235b90a75aa4f0","revisionTime":"2017-03-16T20:15:38Z","tree":true},
|
||||
{"path": "github.com/prometheus/client_golang/prometheus/promhttp", "checksumSHA1": "BM771aKU6hC+5rap48aqvMXczII=", "revision": "f504d69affe11ec1ccb2e5948127f86878c9fd57", "revisionTime": "2018-03-28T13:04:30Z"},
|
||||
{"path":"github.com/pmezard/go-difflib/difflib","checksumSHA1":"LuFv4/jlrmFNnDb/5SCSEPAM9vU=","revision":"792786c7400a136282c1664665ae0a8db921c6c2","revisionTime":"2016-01-10T10:55:54Z"},
|
||||
{"path":"github.com/posener/complete","checksumSHA1":"Nt4Ol6ZM2n0XD5zatxjwEYBpQnw=","revision":"dc2bc5a81accba8782bebea28628224643a8286a","revisionTime":"2017-11-04T09:57:02Z","version":"=v1.1","versionExact":"v1.1"},
|
||||
{"path":"github.com/prometheus/client_golang/prometheus/promhttp","checksumSHA1":"BM771aKU6hC+5rap48aqvMXczII=","revision":"f504d69affe11ec1ccb2e5948127f86878c9fd57","revisionTime":"2018-03-28T13:04:30Z"},
|
||||
{"path":"github.com/ryanuber/columnize","checksumSHA1":"ExnVEVNT8APpFTm26cUb5T09yR4=","comment":"v2.0.1-8-g983d3a5","revision":"9b3edd62028f107d7cabb19353292afd29311a4e","revisionTime":"2016-07-12T16:32:29Z"},
|
||||
{"path":"github.com/sean-/seed","checksumSHA1":"A/YUMbGg1LHIeK2+NLZBt+MIAao=","revision":"3c72d44db0c567f7c901f9c5da5fe68392227750","revisionTime":"2017-02-08T16:47:21Z"},
|
||||
{"path":"github.com/sergi/go-diff/diffmatchpatch","checksumSHA1":"v7C+aJ1D/z3MEeCte6bxvpoGjM4=","revision":"feef008d51ad2b3778f85d387ccf91735543008d","revisionTime":"2017-04-09T07:17:39Z"},
|
||||
|
|
Loading…
Reference in New Issue