ffa4825693
* Ensure correct write ordering in rebuildIssuersChains When troubleshooting a recent migration failure from 1.10->1.11, it was noted that some PKI mounts had bad chain construction despite having valid, chaining issuers. Due to the cluster's leadership trashing between nodes, the migration logic was re-executed several times, partially succeeding each time. While the legacy CA bundle migration logic was written with this in mind, one shortcoming in the chain building code lead us to truncate the ca_chain: by sorting the list of issuers after including non-written issuers (with random IDs), these issuers would occasionally be persisted prior to storage _prior_ to existing CAs with modified chains. The migration code carefully imported the active issuer prior to its parents. However, due to this bug, there was a chance that, if write to the pending parent succeeded but updating the active issuer didn't, the active issuer's ca_chain field would only contain the self-reference and not the parent's reference as well. Ultimately, a workaround of setting and subsequently unsetting a manual chain would force a chain regeneration. In this patch, we simply fix the write ordering: because we need to ensure a stable chain sorting, we leave the sort location in the same place, but delay writing the provided referenceCert to the last position. This is because the reference is meant to be the user-facing action: without transactional write capabilities, other chains may succeed, but if the last user-facing action fails, the user will hopefully retry the action. This will also correct migration, by ensuring the subsequent issuer import will be attempted again, triggering another chain build and only persisting this issuer when all other issuers have also been updated. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Remigrate ca_chains to fix any missing issuers In the previous commit, we identified an issue that would occur on legacy issuer migration to the new storage format. This is easy enough to detect for any given mount (by an operator), but automating scanning and remediating all PKI mounts in large deployments might be difficult. Write a new storage migration version to regenerate all chains on upgrade, once. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add changelog entry Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add issue to PKI considerations documentation Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Correct %v -> %w in chain building errs Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
215 lines
6.5 KiB
Go
215 lines
6.5 KiB
Go
package pki
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/hashicorp/vault/sdk/helper/certutil"
|
|
"github.com/hashicorp/vault/sdk/logical"
|
|
)
|
|
|
|
// This allows us to record the version of the migration code within the log entry
|
|
// in case we find out in the future that something was horribly wrong with the migration,
|
|
// and we need to perform it again...
|
|
const (
|
|
latestMigrationVersion = 2
|
|
legacyBundleShimID = issuerID("legacy-entry-shim-id")
|
|
legacyBundleShimKeyID = keyID("legacy-entry-shim-key-id")
|
|
)
|
|
|
|
type legacyBundleMigrationLog struct {
|
|
Hash string `json:"hash"`
|
|
Created time.Time `json:"created"`
|
|
CreatedIssuer issuerID `json:"issuer_id"`
|
|
CreatedKey keyID `json:"key_id"`
|
|
MigrationVersion int `json:"migrationVersion"`
|
|
}
|
|
|
|
type migrationInfo struct {
|
|
isRequired bool
|
|
legacyBundle *certutil.CertBundle
|
|
legacyBundleHash string
|
|
migrationLog *legacyBundleMigrationLog
|
|
}
|
|
|
|
func getMigrationInfo(ctx context.Context, s logical.Storage) (migrationInfo, error) {
|
|
migrationInfo := migrationInfo{
|
|
isRequired: false,
|
|
legacyBundle: nil,
|
|
legacyBundleHash: "",
|
|
migrationLog: nil,
|
|
}
|
|
|
|
var err error
|
|
_, migrationInfo.legacyBundle, err = getLegacyCertBundle(ctx, s)
|
|
if err != nil {
|
|
return migrationInfo, err
|
|
}
|
|
|
|
migrationInfo.migrationLog, err = getLegacyBundleMigrationLog(ctx, s)
|
|
if err != nil {
|
|
return migrationInfo, err
|
|
}
|
|
|
|
migrationInfo.legacyBundleHash, err = computeHashOfLegacyBundle(migrationInfo.legacyBundle)
|
|
if err != nil {
|
|
return migrationInfo, err
|
|
}
|
|
|
|
// Even if there isn't anything to migrate, we always want to write out the log entry
|
|
// as that will trigger the secondary clusters to toggle/wake up
|
|
if (migrationInfo.migrationLog == nil) ||
|
|
(migrationInfo.migrationLog.Hash != migrationInfo.legacyBundleHash) ||
|
|
(migrationInfo.migrationLog.MigrationVersion != latestMigrationVersion) {
|
|
migrationInfo.isRequired = true
|
|
}
|
|
|
|
return migrationInfo, nil
|
|
}
|
|
|
|
func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error {
|
|
migrationInfo, err := getMigrationInfo(ctx, s)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !migrationInfo.isRequired {
|
|
// No migration was deemed to be required.
|
|
return nil
|
|
}
|
|
|
|
var issuerIdentifier issuerID
|
|
var keyIdentifier keyID
|
|
sc := b.makeStorageContext(ctx, s)
|
|
if migrationInfo.legacyBundle != nil {
|
|
// Generate a unique name for the migrated items in case things were to be re-migrated again
|
|
// for some weird reason in the future...
|
|
migrationName := fmt.Sprintf("current-%d", time.Now().Unix())
|
|
|
|
b.Logger().Info("performing PKI migration to new keys/issuers layout")
|
|
anIssuer, aKey, err := sc.writeCaBundle(migrationInfo.legacyBundle, migrationName, migrationName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
b.Logger().Info("Migration generated the following ids and set them as defaults",
|
|
"issuer id", anIssuer.ID, "key id", aKey.ID)
|
|
issuerIdentifier = anIssuer.ID
|
|
keyIdentifier = aKey.ID
|
|
|
|
// Since we do not have all the mount information available we must schedule
|
|
// the CRL to be rebuilt at a later time.
|
|
b.crlBuilder.requestRebuildIfActiveNode(b)
|
|
}
|
|
|
|
if migrationInfo.migrationLog != nil && migrationInfo.migrationLog.MigrationVersion == 1 {
|
|
// We've seen a bundle with migration version 1; this means an
|
|
// earlier version of the code ran which didn't have the fix for
|
|
// correct write order in rebuildIssuersChains(...). Rather than
|
|
// having every user read the migrated active issuer and see if
|
|
// their chains need rebuilding, we'll schedule a one-off chain
|
|
// migration here.
|
|
b.Logger().Info(fmt.Sprintf("%v: performing maintenance rebuild of ca_chains", b.backendUUID))
|
|
if err := sc.rebuildIssuersChains(nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// We always want to write out this log entry as the secondary clusters leverage this path to wake up
|
|
// if they were upgraded prior to the primary cluster's migration occurred.
|
|
err = setLegacyBundleMigrationLog(ctx, s, &legacyBundleMigrationLog{
|
|
Hash: migrationInfo.legacyBundleHash,
|
|
Created: time.Now(),
|
|
CreatedIssuer: issuerIdentifier,
|
|
CreatedKey: keyIdentifier,
|
|
MigrationVersion: latestMigrationVersion,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
b.Logger().Info(fmt.Sprintf("%v: succeeded in migrating to issuer storage version %v", b.backendUUID, latestMigrationVersion))
|
|
|
|
return nil
|
|
}
|
|
|
|
func computeHashOfLegacyBundle(bundle *certutil.CertBundle) (string, error) {
|
|
hasher := sha256.New()
|
|
// Generate an empty hash if the bundle does not exist.
|
|
if bundle != nil {
|
|
// We only hash the main certificate and the certs within the CAChain,
|
|
// assuming that any sort of change that occurred would have influenced one of those two fields.
|
|
if _, err := hasher.Write([]byte(bundle.Certificate)); err != nil {
|
|
return "", err
|
|
}
|
|
for _, cert := range bundle.CAChain {
|
|
if _, err := hasher.Write([]byte(cert)); err != nil {
|
|
return "", err
|
|
}
|
|
}
|
|
}
|
|
return hex.EncodeToString(hasher.Sum(nil)), nil
|
|
}
|
|
|
|
func getLegacyBundleMigrationLog(ctx context.Context, s logical.Storage) (*legacyBundleMigrationLog, error) {
|
|
entry, err := s.Get(ctx, legacyMigrationBundleLogKey)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if entry == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
lbm := &legacyBundleMigrationLog{}
|
|
err = entry.DecodeJSON(lbm)
|
|
if err != nil {
|
|
// If we can't decode our bundle, lets scrap it and assume a blank value,
|
|
// re-running the migration will at most bring back an older certificate/private key
|
|
return nil, nil
|
|
}
|
|
return lbm, nil
|
|
}
|
|
|
|
func setLegacyBundleMigrationLog(ctx context.Context, s logical.Storage, lbm *legacyBundleMigrationLog) error {
|
|
json, err := logical.StorageEntryJSON(legacyMigrationBundleLogKey, lbm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return s.Put(ctx, json)
|
|
}
|
|
|
|
func getLegacyCertBundle(ctx context.Context, s logical.Storage) (*issuerEntry, *certutil.CertBundle, error) {
|
|
entry, err := s.Get(ctx, legacyCertBundlePath)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if entry == nil {
|
|
return nil, nil, nil
|
|
}
|
|
|
|
cb := &certutil.CertBundle{}
|
|
err = entry.DecodeJSON(cb)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// Fake a storage entry with backwards compatibility in mind.
|
|
issuer := &issuerEntry{
|
|
ID: legacyBundleShimID,
|
|
KeyID: legacyBundleShimKeyID,
|
|
Name: "legacy-entry-shim",
|
|
Certificate: cb.Certificate,
|
|
CAChain: cb.CAChain,
|
|
SerialNumber: cb.SerialNumber,
|
|
LeafNotAfterBehavior: certutil.ErrNotAfterBehavior,
|
|
}
|
|
issuer.Usage.ToggleUsage(AllIssuerUsages)
|
|
|
|
return issuer, cb, nil
|
|
}
|