Adds timeout and waits for feedback when asking the leader loop to reassert.
This adds on to the fix in #3004 for issue #2980.
This commit is contained in:
parent
1be6e3cf80
commit
9c3abd33c3
|
@ -76,6 +76,19 @@ func (s *Server) leaderLoop(stopCh chan struct{}) {
|
|||
var reconcileCh chan serf.Member
|
||||
establishedLeader := false
|
||||
|
||||
reassert := func() error {
|
||||
if !establishedLeader {
|
||||
return fmt.Errorf("leadership has not been established")
|
||||
}
|
||||
if err := s.revokeLeadership(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.establishLeadership(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
RECONCILE:
|
||||
// Setup a reconciliation timer
|
||||
reconcileCh = nil
|
||||
|
@ -125,18 +138,8 @@ WAIT:
|
|||
s.reconcileMember(member)
|
||||
case index := <-s.tombstoneGC.ExpireCh():
|
||||
go s.reapTombstones(index)
|
||||
case <-s.reassertLeaderCh:
|
||||
if !establishedLeader {
|
||||
continue
|
||||
}
|
||||
if err := s.revokeLeadership(); err != nil {
|
||||
s.logger.Printf("[ERR] consul: failed to revoke leadership: %v", err)
|
||||
continue
|
||||
}
|
||||
if err := s.establishLeadership(); err != nil {
|
||||
s.logger.Printf("[ERR] consul: failed to re-establish leadership: %v", err)
|
||||
continue
|
||||
}
|
||||
case errCh := <-s.reassertLeaderCh:
|
||||
errCh <- reassert()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -176,7 +176,7 @@ type Server struct {
|
|||
|
||||
// reassertLeaderCh is used to signal the leader loop should re-run
|
||||
// leadership actions after a snapshot restore.
|
||||
reassertLeaderCh chan struct{}
|
||||
reassertLeaderCh chan chan error
|
||||
|
||||
// tombstoneGC is used to track the pending GC invocations
|
||||
// for the KV tombstones
|
||||
|
@ -270,7 +270,7 @@ func NewServer(config *Config) (*Server, error) {
|
|||
router: servers.NewRouter(logger, shutdownCh, config.Datacenter),
|
||||
rpcServer: rpc.NewServer(),
|
||||
rpcTLS: incomingTLS,
|
||||
reassertLeaderCh: make(chan struct{}),
|
||||
reassertLeaderCh: make(chan chan error),
|
||||
tombstoneGC: gc,
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
|
|
|
@ -101,10 +101,34 @@ func (s *Server) dispatchSnapshotRequest(args *structs.SnapshotRequest, in io.Re
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// This'll be used for feedback from the leader loop.
|
||||
errCh := make(chan error, 1)
|
||||
timeoutCh := time.After(time.Minute)
|
||||
|
||||
select {
|
||||
// Tell the leader loop to reassert leader actions since we just
|
||||
// replaced the state store contents.
|
||||
case s.reassertLeaderCh <- struct{}{}:
|
||||
case s.reassertLeaderCh <- errCh:
|
||||
|
||||
// We might have lost leadership while waiting to kick the loop.
|
||||
case <-timeoutCh:
|
||||
return nil, fmt.Errorf("timed out waiting to re-run leader actions")
|
||||
|
||||
// Make sure we don't get stuck during shutdown
|
||||
case <-s.shutdownCh:
|
||||
}
|
||||
|
||||
select {
|
||||
// Wait for the leader loop to finish up.
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We might have lost leadership while the loop was doing its
|
||||
// thing.
|
||||
case <-timeoutCh:
|
||||
return nil, fmt.Errorf("timed out waiting for re-run of leader actions")
|
||||
|
||||
// Make sure we don't get stuck during shutdown
|
||||
case <-s.shutdownCh:
|
||||
|
|
Loading…
Reference in New Issue