fix panic from keyring raft entries being written during upgrade (#14821)
During an upgrade to Nomad 1.4.0, if a server running 1.4.0 becomes the leader before one of the 1.3.x servers, the old server will crash because the keyring is initialized and writes a raft entry. Wait until all members are on a version that supports the keyring before initializing it.
This commit is contained in:
parent
36c644aaf2
commit
80ec5e1346
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
keyring: Fixed a panic that can occur during upgrades to 1.4.0 when initializing the keyring
|
||||
```
|
|
@ -277,6 +277,9 @@ func (e *Encrypter) activeKeySetLocked() (*keyset, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if keyMeta == nil {
|
||||
return nil, fmt.Errorf("keyring has not been initialized yet")
|
||||
}
|
||||
|
||||
return e.keysetByIDLocked(keyMeta.KeyID)
|
||||
}
|
||||
|
|
|
@ -294,10 +294,7 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
|
|||
schedulerConfig := s.getOrCreateSchedulerConfig()
|
||||
|
||||
// Create the first root key if it doesn't already exist
|
||||
err := s.initializeKeyring()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go s.initializeKeyring(stopCh)
|
||||
|
||||
// Initialize the ClusterID
|
||||
_, _ = s.ClusterID()
|
||||
|
@ -1966,43 +1963,66 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration {
|
|||
return config
|
||||
}
|
||||
|
||||
var minVersionKeyring = version.Must(version.NewVersion("1.4.0"))
|
||||
|
||||
// initializeKeyring creates the first root key if the leader doesn't
|
||||
// already have one. The metadata will be replicated via raft and then
|
||||
// the followers will get the key material from their own key
|
||||
// replication.
|
||||
func (s *Server) initializeKeyring() error {
|
||||
func (s *Server) initializeKeyring(stopCh <-chan struct{}) {
|
||||
|
||||
logger := s.logger.Named("keyring")
|
||||
|
||||
store := s.fsm.State()
|
||||
keyMeta, err := store.GetActiveRootKeyMeta(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
logger.Error("failed to get active key: %v", err)
|
||||
return
|
||||
}
|
||||
if keyMeta != nil {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
s.logger.Named("core").Trace("initializing keyring")
|
||||
logger.Trace("verifying cluster is ready to initialize keyring")
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
if ServersMeetMinimumVersion(s.serf.Members(), minVersionKeyring, true) {
|
||||
break
|
||||
}
|
||||
}
|
||||
// we might have lost leadershuip during the version check
|
||||
if !s.IsLeader() {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Trace("initializing keyring")
|
||||
|
||||
rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM)
|
||||
rootKey.Meta.SetActive()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not initialize keyring: %v", err)
|
||||
logger.Error("could not initialize keyring: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = s.encrypter.AddKey(rootKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not add initial key to keyring: %v", err)
|
||||
logger.Error("could not add initial key to keyring: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType,
|
||||
structs.KeyringUpdateRootKeyMetaRequest{
|
||||
RootKeyMeta: rootKey.Meta,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("could not initialize keyring: %v", err)
|
||||
logger.Error("could not initialize keyring: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.logger.Named("core").Info("initialized keyring", "id", rootKey.Meta.KeyID)
|
||||
return nil
|
||||
logger.Info("initialized keyring", "id", rootKey.Meta.KeyID)
|
||||
}
|
||||
|
||||
func (s *Server) generateClusterID() (string, error) {
|
||||
|
|
|
@ -243,7 +243,7 @@ func TestPlanApply_applyPlanWithNormalizedAllocs(t *testing.T) {
|
|||
ci.Parallel(t)
|
||||
|
||||
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
||||
c.Build = "0.9.2"
|
||||
c.Build = "1.4.0"
|
||||
})
|
||||
defer cleanupS1()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
|
|
@ -488,7 +488,7 @@ func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) {
|
|||
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
||||
c.NumSchedulers = 0
|
||||
c.EnabledSchedulers = []string{structs.JobTypeService}
|
||||
c.Build = "0.9.2"
|
||||
c.Build = "1.4.0"
|
||||
})
|
||||
defer cleanupS1()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
|
Loading…
Reference in New Issue