fix panic from keyring raft entries being written during upgrade (#14821)

During an upgrade to Nomad 1.4.0, if a server running 1.4.0 becomes the leader
before one of the 1.3.x servers, the old server will crash because the keyring
is initialized and writes a raft entry.

Wait until all members are on a version that supports the keyring before
initializing it.
This commit is contained in:
Tim Gross 2022-10-06 12:47:02 -04:00 committed by GitHub
parent 36c644aaf2
commit 80ec5e1346
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 15 deletions

3
.changelog/14821.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
keyring: Fixed a panic that can occur during upgrades to 1.4.0 when initializing the keyring
```

View File

@ -277,6 +277,9 @@ func (e *Encrypter) activeKeySetLocked() (*keyset, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
if keyMeta == nil {
return nil, fmt.Errorf("keyring has not been initialized yet")
}
return e.keysetByIDLocked(keyMeta.KeyID) return e.keysetByIDLocked(keyMeta.KeyID)
} }

View File

@ -294,10 +294,7 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
schedulerConfig := s.getOrCreateSchedulerConfig() schedulerConfig := s.getOrCreateSchedulerConfig()
// Create the first root key if it doesn't already exist // Create the first root key if it doesn't already exist
err := s.initializeKeyring() go s.initializeKeyring(stopCh)
if err != nil {
return err
}
// Initialize the ClusterID // Initialize the ClusterID
_, _ = s.ClusterID() _, _ = s.ClusterID()
@ -1966,43 +1963,66 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration {
return config return config
} }
var minVersionKeyring = version.Must(version.NewVersion("1.4.0"))
// initializeKeyring creates the first root key if the leader doesn't // initializeKeyring creates the first root key if the leader doesn't
// already have one. The metadata will be replicated via raft and then // already have one. The metadata will be replicated via raft and then
// the followers will get the key material from their own key // the followers will get the key material from their own key
// replication. // replication.
func (s *Server) initializeKeyring() error { func (s *Server) initializeKeyring(stopCh <-chan struct{}) {
logger := s.logger.Named("keyring")
store := s.fsm.State() store := s.fsm.State()
keyMeta, err := store.GetActiveRootKeyMeta(nil) keyMeta, err := store.GetActiveRootKeyMeta(nil)
if err != nil { if err != nil {
return err logger.Error("failed to get active key: %v", err)
return
} }
if keyMeta != nil { if keyMeta != nil {
return nil return
} }
s.logger.Named("core").Trace("initializing keyring") logger.Trace("verifying cluster is ready to initialize keyring")
for {
select {
case <-stopCh:
return
default:
}
if ServersMeetMinimumVersion(s.serf.Members(), minVersionKeyring, true) {
break
}
}
// we might have lost leadershuip during the version check
if !s.IsLeader() {
return
}
logger.Trace("initializing keyring")
rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM)
rootKey.Meta.SetActive() rootKey.Meta.SetActive()
if err != nil { if err != nil {
return fmt.Errorf("could not initialize keyring: %v", err) logger.Error("could not initialize keyring: %v", err)
return
} }
err = s.encrypter.AddKey(rootKey) err = s.encrypter.AddKey(rootKey)
if err != nil { if err != nil {
return fmt.Errorf("could not add initial key to keyring: %v", err) logger.Error("could not add initial key to keyring: %v", err)
return
} }
if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType,
structs.KeyringUpdateRootKeyMetaRequest{ structs.KeyringUpdateRootKeyMetaRequest{
RootKeyMeta: rootKey.Meta, RootKeyMeta: rootKey.Meta,
}); err != nil { }); err != nil {
return fmt.Errorf("could not initialize keyring: %v", err) logger.Error("could not initialize keyring: %v", err)
return
} }
s.logger.Named("core").Info("initialized keyring", "id", rootKey.Meta.KeyID) logger.Info("initialized keyring", "id", rootKey.Meta.KeyID)
return nil
} }
func (s *Server) generateClusterID() (string, error) { func (s *Server) generateClusterID() (string, error) {

View File

@ -243,7 +243,7 @@ func TestPlanApply_applyPlanWithNormalizedAllocs(t *testing.T) {
ci.Parallel(t) ci.Parallel(t)
s1, cleanupS1 := TestServer(t, func(c *Config) { s1, cleanupS1 := TestServer(t, func(c *Config) {
c.Build = "0.9.2" c.Build = "1.4.0"
}) })
defer cleanupS1() defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC) testutil.WaitForLeader(t, s1.RPC)

View File

@ -488,7 +488,7 @@ func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) {
s1, cleanupS1 := TestServer(t, func(c *Config) { s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0 c.NumSchedulers = 0
c.EnabledSchedulers = []string{structs.JobTypeService} c.EnabledSchedulers = []string{structs.JobTypeService}
c.Build = "0.9.2" c.Build = "1.4.0"
}) })
defer cleanupS1() defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC) testutil.WaitForLeader(t, s1.RPC)