replication: fix potential panic during upgrades (#17476)
If the authoritative region has been upgraded to a version of Nomad that has new replicated objects (such as ACL Auth Methods, ACL Binding Rules, etc.), the non-authoritative regions will start replicating those objects as soon as their leader is upgraded. If a server in the non-authoritative region is upgraded and then becomes the leader before all the other servers in the region have been upgraded, then it will attempt to write a Raft log entry that the followers don't understand. The followers will then panic. Add same the minimum version checks that we do for RPC writes to the leader's replication loop.
This commit is contained in:
parent
8bd3bdab42
commit
e3a37c0b97
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
replication: Fix a potential panic when a non-authoritative region is upgraded and a server with the new version becomes the leader.
|
||||
```
|
|
@ -58,7 +58,7 @@ var minACLRoleVersion = version.Must(version.NewVersion("1.4.0"))
|
|||
// minACLAuthMethodVersion is the Nomad version at which the ACL auth methods
|
||||
// table was introduced. It forms the minimum version all federated servers must
|
||||
// meet before the feature can be used.
|
||||
var minACLAuthMethodVersion = version.Must(version.NewVersion("1.5.0-beta.1"))
|
||||
var minACLAuthMethodVersion = version.Must(version.NewVersion("1.5.0"))
|
||||
|
||||
// minACLJWTAuthMethodVersion is the Nomad version at which the ACL JWT auth method type
|
||||
// was introduced. It forms the minimum version all federated servers must
|
||||
|
@ -68,7 +68,7 @@ var minACLJWTAuthMethodVersion = version.Must(version.NewVersion("1.5.4"))
|
|||
// minACLBindingRuleVersion is the Nomad version at which the ACL binding rules
|
||||
// table was introduced. It forms the minimum version all federated servers
|
||||
// must meet before the feature can be used.
|
||||
var minACLBindingRuleVersion = version.Must(version.NewVersion("1.5.0-beta.1"))
|
||||
var minACLBindingRuleVersion = version.Must(version.NewVersion("1.5.0"))
|
||||
|
||||
// minNomadServiceRegistrationVersion is the Nomad version at which the service
|
||||
// registrations table was introduced. It forms the minimum version all local
|
||||
|
@ -1848,6 +1848,17 @@ func (s *Server) replicateACLRoles(stopCh chan struct{}) {
|
|||
// parameters are controlled internally.
|
||||
_ = limiter.Wait(context.Background())
|
||||
|
||||
if !ServersMeetMinimumVersion(
|
||||
s.serf.Members(), s.Region(), minACLRoleVersion, true) {
|
||||
s.logger.Trace(
|
||||
"all servers must be upgraded to 1.4.0 or later before ACL Roles can be replicated")
|
||||
if s.replicationBackoffContinue(stopCh) {
|
||||
continue
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Set the replication token on each replication iteration so that
|
||||
// it is always current and can handle agent SIGHUP reloads.
|
||||
req.AuthToken = s.ReplicationToken()
|
||||
|
@ -2046,6 +2057,17 @@ func (s *Server) replicateACLAuthMethods(stopCh chan struct{}) {
|
|||
// parameters are controlled internally.
|
||||
_ = limiter.Wait(context.Background())
|
||||
|
||||
if !ServersMeetMinimumVersion(
|
||||
s.serf.Members(), s.Region(), minACLAuthMethodVersion, true) {
|
||||
s.logger.Trace(
|
||||
"all servers must be upgraded to 1.5.0 or later before ACL Auth Methods can be replicated")
|
||||
if s.replicationBackoffContinue(stopCh) {
|
||||
continue
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Set the replication token on each replication iteration so that
|
||||
// it is always current and can handle agent SIGHUP reloads.
|
||||
req.AuthToken = s.ReplicationToken()
|
||||
|
@ -2241,6 +2263,17 @@ func (s *Server) replicateACLBindingRules(stopCh chan struct{}) {
|
|||
// parameters are controlled internally.
|
||||
_ = limiter.Wait(context.Background())
|
||||
|
||||
if !ServersMeetMinimumVersion(
|
||||
s.serf.Members(), s.Region(), minACLBindingRuleVersion, true) {
|
||||
s.logger.Trace(
|
||||
"all servers must be upgraded to 1.5.0 or later before ACL Binding Rules can be replicated")
|
||||
if s.replicationBackoffContinue(stopCh) {
|
||||
continue
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Set the replication token on each replication iteration so that
|
||||
// it is always current and can handle agent SIGHUP reloads.
|
||||
req.AuthToken = s.ReplicationToken()
|
||||
|
|
Loading…
Reference in New Issue