Do not attempt to write a new TLS keyring at startup if raft is already setup (#17079)

This commit is contained in:
Nick Cabatoff 2022-09-09 12:19:57 -04:00 committed by GitHub
parent 5b5699e9b0
commit 3075c5bd65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 54 deletions

2
changelog/17079.txt Normal file
View File

@ -0,0 +1,2 @@
```release-note:bug
storage/raft: Fix error writing raft TLS keyring during follower joins

View File

@ -73,72 +73,74 @@ func (c *Core) startRaftBackend(ctx context.Context) (retErr error) {
return nil return nil
} }
// Retrieve the raft TLS information
raftTLSEntry, err := c.barrier.Get(ctx, raftTLSStoragePath)
if err != nil {
return err
}
var creating bool var creating bool
var raftTLS *raft.TLSKeyring var raftTLS *raft.TLSKeyring
switch raftTLSEntry { if !raftBackend.Initialized() {
case nil: // Retrieve the raft TLS information
// If this is HA-only and no TLS keyring is found, that means the raftTLSEntry, err := c.barrier.Get(ctx, raftTLSStoragePath)
// cluster has not been bootstrapped or joined. We return early here in
// this case. If we return here, the raft object has not been instantiated,
// and a bootstrap call should be made.
if c.isRaftHAOnly() {
c.logger.Trace("skipping raft backend setup during unseal, no bootstrap operation has been started yet")
return nil
}
// If we did not find a TLS keyring we will attempt to create one here.
// This happens after a storage migration process. This node is also
// marked to start as leader so we can write the new TLS Key. This is an
// error condition if there are already multiple nodes in the cluster,
// and the below storage write will fail. If the cluster is somehow in
// this state the unseal will fail and a cluster recovery will need to
// be done.
creating = true
raftTLSKey, err := raft.GenerateTLSKey(c.secureRandomReader)
if err != nil { if err != nil {
return err return err
} }
raftTLS = &raft.TLSKeyring{ switch raftTLSEntry {
Keys: []*raft.TLSKey{raftTLSKey}, case nil:
ActiveKeyID: raftTLSKey.ID, // If this is HA-only and no TLS keyring is found, that means the
// cluster has not been bootstrapped or joined. We return early here in
// this case. If we return here, the raft object has not been instantiated,
// and a bootstrap call should be made.
if c.isRaftHAOnly() {
c.logger.Trace("skipping raft backend setup during unseal, no bootstrap operation has been started yet")
return nil
}
// If we did not find a TLS keyring we will attempt to create one here.
// This happens after a storage migration process. This node is also
// marked to start as leader so we can write the new TLS Key. This is an
// error condition if there are already multiple nodes in the cluster,
// and the below storage write will fail. If the cluster is somehow in
// this state the unseal will fail and a cluster recovery will need to
// be done.
creating = true
raftTLSKey, err := raft.GenerateTLSKey(c.secureRandomReader)
if err != nil {
return err
}
raftTLS = &raft.TLSKeyring{
Keys: []*raft.TLSKey{raftTLSKey},
ActiveKeyID: raftTLSKey.ID,
}
default:
raftTLS = new(raft.TLSKeyring)
if err := raftTLSEntry.DecodeJSON(raftTLS); err != nil {
return err
}
} }
default:
raftTLS = new(raft.TLSKeyring) hasState, err := raftBackend.HasState()
if err := raftTLSEntry.DecodeJSON(raftTLS); err != nil { if err != nil {
return err return err
} }
}
hasState, err := raftBackend.HasState() // This can be hit on follower nodes that got their config updated to use
if err != nil { // raft for HA-only before they are joined to the cluster. Since followers
return err // in this case use shared storage, it doesn't return early from the TLS
} // case above, but there's not raft state yet for the backend to call
// raft.SetupCluster.
if !hasState {
c.logger.Trace("skipping raft backend setup during unseal, no raft state found")
return nil
}
// This can be hit on follower nodes that got their config updated to use raftBackend.SetRestoreCallback(c.raftSnapshotRestoreCallback(true, true))
// raft for HA-only before they are joined to the cluster. Since followers
// in this case use shared storage, it doesn't return early from the TLS
// case above, but there's not raft state yet for the backend to call
// raft.SetupCluster.
if !hasState {
c.logger.Trace("skipping raft backend setup during unseal, no raft state found")
return nil
}
raftBackend.SetRestoreCallback(c.raftSnapshotRestoreCallback(true, true)) if err := raftBackend.SetupCluster(ctx, raft.SetupOpts{
TLSKeyring: raftTLS,
if err := raftBackend.SetupCluster(ctx, raft.SetupOpts{ ClusterListener: c.getClusterListener(),
TLSKeyring: raftTLS, StartAsLeader: creating,
ClusterListener: c.getClusterListener(), }); err != nil {
StartAsLeader: creating, return err
}); err != nil { }
return err
} }
defer func() { defer func() {