Update serf to pick up graceful leave fix
This commit is contained in:
parent
b8c071e0f9
commit
ca5758741b
|
@ -1245,6 +1245,10 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (
|
|||
}
|
||||
conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion]
|
||||
conf.RejoinAfterLeave = true
|
||||
// LeavePropagateDelay is used to make sure broadcasted leave intents propagate
|
||||
// This value was tuned using https://www.serf.io/docs/internals/simulator.html to
|
||||
// allow for convergence in 99.9% of nodes in a 10 node cluster
|
||||
conf.LeavePropagateDelay = 1 * time.Second
|
||||
conf.Merge = &serfMergeDelegate{}
|
||||
|
||||
// Until Nomad supports this fully, we disable automatic resolution.
|
||||
|
|
|
@ -55,6 +55,13 @@ type Config struct {
|
|||
// set, a timeout of 5 seconds will be set.
|
||||
BroadcastTimeout time.Duration
|
||||
|
||||
// LeavePropagateDelay is for our leave (node dead) message to propagate
|
||||
// through the cluster. In particular, we want to stay up long enough to
|
||||
// service any probes from other nodes before they learn about us
|
||||
// leaving and stop probing. Otherwise, we risk getting node failures as
|
||||
// we leave.
|
||||
LeavePropagateDelay time.Duration
|
||||
|
||||
// The settings below relate to Serf's event coalescence feature. Serf
|
||||
// is able to coalesce multiple events into single events in order to
|
||||
// reduce the amount of noise that is sent along the EventCh. For example
|
||||
|
@ -255,6 +262,7 @@ func DefaultConfig() *Config {
|
|||
return &Config{
|
||||
NodeName: hostname,
|
||||
BroadcastTimeout: 5 * time.Second,
|
||||
LeavePropagateDelay: 1 * time.Second,
|
||||
EventBuffer: 512,
|
||||
QueryBuffer: 512,
|
||||
LogOutput: os.Stderr,
|
||||
|
|
|
@ -223,13 +223,16 @@ func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
|
|||
d.serf.queryClock.Witness(pp.QueryLTime - 1)
|
||||
}
|
||||
|
||||
// Process the left nodes first to avoid the LTimes from being increment
|
||||
// in the wrong order
|
||||
// Process the left nodes first to avoid the LTimes from incrementing
|
||||
// in the wrong order. Note that we don't have the actual Lamport time
|
||||
// for the leave message, so we go one past the join time, since the
|
||||
// leave must have been accepted after that to get onto the left members
|
||||
// list. If we didn't do this then the message would not get processed.
|
||||
leftMap := make(map[string]struct{}, len(pp.LeftMembers))
|
||||
leave := messageLeave{}
|
||||
for _, name := range pp.LeftMembers {
|
||||
leftMap[name] = struct{}{}
|
||||
leave.LTime = pp.StatusLTimes[name]
|
||||
leave.LTime = pp.StatusLTimes[name] + 1
|
||||
leave.Node = name
|
||||
d.serf.handleNodeLeaveIntent(&leave)
|
||||
}
|
||||
|
|
|
@ -691,6 +691,13 @@ func (s *Serf) Leave() error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Wait for the leave to propagate through the cluster. The broadcast
|
||||
// timeout is how long we wait for the message to go out from our own
|
||||
// queue, but this wait is for that message to propagate through the
|
||||
// cluster. In particular, we want to stay up long enough to service
|
||||
// any probes from other nodes before they learn about us leaving.
|
||||
time.Sleep(s.config.LeavePropagateDelay)
|
||||
|
||||
// Transition to Left only if we not already shutdown
|
||||
s.stateLock.Lock()
|
||||
if s.state != SerfShutdown {
|
||||
|
|
|
@ -180,8 +180,8 @@
|
|||
{"path":"github.com/hashicorp/net-rpc-msgpackrpc","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3"},
|
||||
{"path":"github.com/hashicorp/raft","checksumSHA1":"zkA9uvbj1BdlveyqXpVTh1N6ers=","revision":"077966dbc90f342107eb723ec52fdb0463ec789b","revisionTime":"2018-01-17T20:29:25Z","version":"master","versionExact":"master"},
|
||||
{"path":"github.com/hashicorp/raft-boltdb","checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"},
|
||||
{"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"0PeWsO2aI+2PgVYlYlDPKfzCLEQ=","revision":"fc4bdedf2366c64984e280c6eefc703ca7812585","revisionTime":"2018-04-11T17:01:37Z"},
|
||||
{"path":"github.com/hashicorp/serf/serf","checksumSHA1":"YzJaaeIJpxLfVDZYT1X2hpd8IK8=","revision":"fc4bdedf2366c64984e280c6eefc703ca7812585","revisionTime":"2018-04-11T17:01:37Z"},
|
||||
{"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"0PeWsO2aI+2PgVYlYlDPKfzCLEQ=","revision":"80ab48778deee28e4ea2dc4ef1ebb2c5f4063996","revisionTime":"2018-05-07T23:19:28Z"},
|
||||
{"path":"github.com/hashicorp/serf/serf","checksumSHA1":"QrT+nzyXsD/MmhTjjhcPdnALZ1I=","revision":"80ab48778deee28e4ea2dc4ef1ebb2c5f4063996","revisionTime":"2018-05-07T23:19:28Z"},
|
||||
{"path":"github.com/hashicorp/vault","checksumSHA1":"eGzvBRMFD6ZB3A6uO750np7Om/E=","revision":"182ba68a9589d4cef95234134aaa498a686e3de3","revisionTime":"2016-08-21T23:40:57Z"},
|
||||
{"path":"github.com/hashicorp/vault/api","checksumSHA1":"mKN4rEIWyflT6aqJyjgu9m1tPXI=","revision":"3ddd3bd20cec0588788547aecd15e91461b9d546","revisionTime":"2018-04-03T21:11:47Z"},
|
||||
{"path":"github.com/hashicorp/vault/helper/compressutil","checksumSHA1":"jHVLe8KMdEpb/ZALp0zu+tenADo=","revision":"3ddd3bd20cec0588788547aecd15e91461b9d546","revisionTime":"2018-04-03T21:11:47Z"},
|
||||
|
|
Loading…
Reference in New Issue