server: transfer leadership in case of error (#12293)

When a Nomad server becomes the Raft leader, it must perform several
actions defined in the establishLeadership function. If any of these
actions fail, Raft will think the node is the leader, but it will not
actually be able to act as a Nomad leader.

In this scenario, leadership must be revoked and transferred to another
server if possible, or the node should retry the establishLeadership
steps.
This commit is contained in:
Luiz Aoqui 2022-03-17 11:10:57 -04:00 committed by GitHub
parent 91e4d20b1d
commit 8db12c2a17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 2 deletions

3
.changelog/12293.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
server: Transfer Raft leadership in case the Nomad server fails to establish leadership
```

View File

@ -120,6 +120,30 @@ func (s *Server) monitorLeadership() {
}
}
func (s *Server) leadershipTransfer() error {
retryCount := 3
for i := 0; i < retryCount; i++ {
err := s.raft.LeadershipTransfer().Error()
if err == nil {
s.logger.Info("successfully transferred leadership")
return nil
}
// Don't retry if the Raft version doesn't support leadership transfer
// since this will never succeed.
if err == raft.ErrUnsupportedProtocol {
return fmt.Errorf("leadership transfer not supported with Raft version lower than 3")
}
s.logger.Error("failed to transfer leadership attempt, will retry",
"attempt", i,
"retry_limit", retryCount,
"error", err,
)
}
return fmt.Errorf("failed to transfer leadership in %d attempts", retryCount)
}
// leaderLoop runs as long as we are the leader to run various
// maintenance activities
func (s *Server) leaderLoop(stopCh chan struct{}) {
@ -151,7 +175,15 @@ RECONCILE:
s.logger.Error("failed to revoke leadership", "error", err)
}
goto WAIT
// Attempt to transfer leadership. If successful, leave the
// leaderLoop since this node is no longer the leader. Otherwise
// try to establish leadership again after 5 seconds.
if err := s.leadershipTransfer(); err != nil {
s.logger.Error("failed to transfer leadership", "error", err)
interval = time.After(5 * time.Second)
goto WAIT
}
return
}
establishedLeader = true
@ -182,10 +214,12 @@ RECONCILE:
}
WAIT:
// Wait until leadership is lost
// Wait until leadership is lost or periodically reconcile as long as we
// are the leader, or when Serf events arrive.
for {
select {
case <-stopCh:
// Lost leadership.
return
case <-s.shutdownCh:
return
@ -213,6 +247,27 @@ WAIT:
s.revokeLeadership()
err := s.establishLeadership(stopCh)
errCh <- err
// In case establishLeadership fails, try to transfer leadership.
// At this point Raft thinks we are the leader, but Nomad did not
// complete the required steps to act as the leader.
if err != nil {
if err := s.leadershipTransfer(); err != nil {
// establishedLeader was true before, but it no longer is
// since we revoked leadership and leadershipTransfer also
// failed.
// Stay in the leaderLoop with establishedLeader set to
// false so we try to establish leadership again in the
// next loop.
establishedLeader = false
interval = time.After(5 * time.Second)
goto WAIT
}
// leadershipTransfer was successful and it is
// time to leave the leaderLoop.
return
}
}
}
}

View File

@ -51,6 +51,14 @@ The volume staging directory for new CSI plugin tasks will now be
mounted to the task's `NOMAD_TASK_DIR` instead of the
`csi_plugin.mount_config`.
#### Raft leadership transfer on error
Starting with Nomad 1.3.0, when a Nomad server is elected the Raft leader but
fails to complete the process to start acting as the Nomad leader it will
attempt to gracefully transfer its Raft leadership status to another eligible
server in the cluster. This operation is only supported when using Raft
Protocol Version 3.
#### Server Raft Database
The server raft database in `raft.db` will be automatically migrated to a new