fix panic from keyring raft entries being written during upgrade (#14821)
During an upgrade to Nomad 1.4.0, if a server running 1.4.0 becomes the leader before one of the 1.3.x servers, the old server will crash because the keyring is initialized and writes a raft entry. Wait until all members are on a version that supports the keyring before initializing it.
This commit is contained in:
parent
36c644aaf2
commit
80ec5e1346
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
keyring: Fixed a panic that can occur during upgrades to 1.4.0 when initializing the keyring
|
||||||
|
```
|
|
@ -277,6 +277,9 @@ func (e *Encrypter) activeKeySetLocked() (*keyset, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if keyMeta == nil {
|
||||||
|
return nil, fmt.Errorf("keyring has not been initialized yet")
|
||||||
|
}
|
||||||
|
|
||||||
return e.keysetByIDLocked(keyMeta.KeyID)
|
return e.keysetByIDLocked(keyMeta.KeyID)
|
||||||
}
|
}
|
||||||
|
|
|
@ -294,10 +294,7 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
|
||||||
schedulerConfig := s.getOrCreateSchedulerConfig()
|
schedulerConfig := s.getOrCreateSchedulerConfig()
|
||||||
|
|
||||||
// Create the first root key if it doesn't already exist
|
// Create the first root key if it doesn't already exist
|
||||||
err := s.initializeKeyring()
|
go s.initializeKeyring(stopCh)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the ClusterID
|
// Initialize the ClusterID
|
||||||
_, _ = s.ClusterID()
|
_, _ = s.ClusterID()
|
||||||
|
@ -1966,43 +1963,66 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration {
|
||||||
return config
|
return config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var minVersionKeyring = version.Must(version.NewVersion("1.4.0"))
|
||||||
|
|
||||||
// initializeKeyring creates the first root key if the leader doesn't
|
// initializeKeyring creates the first root key if the leader doesn't
|
||||||
// already have one. The metadata will be replicated via raft and then
|
// already have one. The metadata will be replicated via raft and then
|
||||||
// the followers will get the key material from their own key
|
// the followers will get the key material from their own key
|
||||||
// replication.
|
// replication.
|
||||||
func (s *Server) initializeKeyring() error {
|
func (s *Server) initializeKeyring(stopCh <-chan struct{}) {
|
||||||
|
|
||||||
|
logger := s.logger.Named("keyring")
|
||||||
|
|
||||||
store := s.fsm.State()
|
store := s.fsm.State()
|
||||||
keyMeta, err := store.GetActiveRootKeyMeta(nil)
|
keyMeta, err := store.GetActiveRootKeyMeta(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
logger.Error("failed to get active key: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if keyMeta != nil {
|
if keyMeta != nil {
|
||||||
return nil
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.logger.Named("core").Trace("initializing keyring")
|
logger.Trace("verifying cluster is ready to initialize keyring")
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-stopCh:
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if ServersMeetMinimumVersion(s.serf.Members(), minVersionKeyring, true) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we might have lost leadershuip during the version check
|
||||||
|
if !s.IsLeader() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Trace("initializing keyring")
|
||||||
|
|
||||||
rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM)
|
rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM)
|
||||||
rootKey.Meta.SetActive()
|
rootKey.Meta.SetActive()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not initialize keyring: %v", err)
|
logger.Error("could not initialize keyring: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = s.encrypter.AddKey(rootKey)
|
err = s.encrypter.AddKey(rootKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not add initial key to keyring: %v", err)
|
logger.Error("could not add initial key to keyring: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType,
|
if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType,
|
||||||
structs.KeyringUpdateRootKeyMetaRequest{
|
structs.KeyringUpdateRootKeyMetaRequest{
|
||||||
RootKeyMeta: rootKey.Meta,
|
RootKeyMeta: rootKey.Meta,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return fmt.Errorf("could not initialize keyring: %v", err)
|
logger.Error("could not initialize keyring: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.logger.Named("core").Info("initialized keyring", "id", rootKey.Meta.KeyID)
|
logger.Info("initialized keyring", "id", rootKey.Meta.KeyID)
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) generateClusterID() (string, error) {
|
func (s *Server) generateClusterID() (string, error) {
|
||||||
|
|
|
@ -243,7 +243,7 @@ func TestPlanApply_applyPlanWithNormalizedAllocs(t *testing.T) {
|
||||||
ci.Parallel(t)
|
ci.Parallel(t)
|
||||||
|
|
||||||
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
||||||
c.Build = "0.9.2"
|
c.Build = "1.4.0"
|
||||||
})
|
})
|
||||||
defer cleanupS1()
|
defer cleanupS1()
|
||||||
testutil.WaitForLeader(t, s1.RPC)
|
testutil.WaitForLeader(t, s1.RPC)
|
||||||
|
|
|
@ -488,7 +488,7 @@ func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) {
|
||||||
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
||||||
c.NumSchedulers = 0
|
c.NumSchedulers = 0
|
||||||
c.EnabledSchedulers = []string{structs.JobTypeService}
|
c.EnabledSchedulers = []string{structs.JobTypeService}
|
||||||
c.Build = "0.9.2"
|
c.Build = "1.4.0"
|
||||||
})
|
})
|
||||||
defer cleanupS1()
|
defer cleanupS1()
|
||||||
testutil.WaitForLeader(t, s1.RPC)
|
testutil.WaitForLeader(t, s1.RPC)
|
||||||
|
|
Loading…
Reference in New Issue