From ffa482569355002ee108892c4cf1d9cb1c3cfa17 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Thu, 3 Nov 2022 11:50:03 -0400 Subject: [PATCH] PKI - Fix order of chain building writes (#17772) * Ensure correct write ordering in rebuildIssuersChains When troubleshooting a recent migration failure from 1.10->1.11, it was noted that some PKI mounts had bad chain construction despite having valid, chaining issuers. Due to the cluster's leadership trashing between nodes, the migration logic was re-executed several times, partially succeeding each time. While the legacy CA bundle migration logic was written with this in mind, one shortcoming in the chain building code lead us to truncate the ca_chain: by sorting the list of issuers after including non-written issuers (with random IDs), these issuers would occasionally be persisted prior to storage _prior_ to existing CAs with modified chains. The migration code carefully imported the active issuer prior to its parents. However, due to this bug, there was a chance that, if write to the pending parent succeeded but updating the active issuer didn't, the active issuer's ca_chain field would only contain the self-reference and not the parent's reference as well. Ultimately, a workaround of setting and subsequently unsetting a manual chain would force a chain regeneration. In this patch, we simply fix the write ordering: because we need to ensure a stable chain sorting, we leave the sort location in the same place, but delay writing the provided referenceCert to the last position. This is because the reference is meant to be the user-facing action: without transactional write capabilities, other chains may succeed, but if the last user-facing action fails, the user will hopefully retry the action. This will also correct migration, by ensuring the subsequent issuer import will be attempted again, triggering another chain build and only persisting this issuer when all other issuers have also been updated. Signed-off-by: Alexander Scheel * Remigrate ca_chains to fix any missing issuers In the previous commit, we identified an issue that would occur on legacy issuer migration to the new storage format. This is easy enough to detect for any given mount (by an operator), but automating scanning and remediating all PKI mounts in large deployments might be difficult. Write a new storage migration version to regenerate all chains on upgrade, once. Signed-off-by: Alexander Scheel * Add changelog entry Signed-off-by: Alexander Scheel * Add issue to PKI considerations documentation Signed-off-by: Alexander Scheel * Correct %v -> %w in chain building errs Signed-off-by: Alexander Scheel Signed-off-by: Alexander Scheel --- builtin/logical/pki/chain_util.go | 44 ++++- builtin/logical/pki/storage_migrations.go | 19 +- .../logical/pki/storage_migrations_test.go | 183 +++++++++++++++++- builtin/logical/pki/storage_test.go | 8 + changelog/17772.txt | 3 + .../docs/secrets/pki/considerations.mdx | 28 +++ 6 files changed, 278 insertions(+), 7 deletions(-) create mode 100644 changelog/17772.txt diff --git a/builtin/logical/pki/chain_util.go b/builtin/logical/pki/chain_util.go index 334000a7c..6b7a6a5c7 100644 --- a/builtin/logical/pki/chain_util.go +++ b/builtin/logical/pki/chain_util.go @@ -43,7 +43,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // To begin, we fetch all known issuers from disk. issuers, err := sc.listIssuers() if err != nil { - return fmt.Errorf("unable to list issuers to build chain: %v", err) + return fmt.Errorf("unable to list issuers to build chain: %w", err) } // Fast path: no issuers means we can set the reference cert's value, if @@ -79,6 +79,28 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // Now call a stable sorting algorithm here. We want to ensure the results // are the same across multiple calls to rebuildIssuersChains with the same // input data. + // + // Note: while we want to ensure referenceCert is written last (because it + // is the user-facing action), we need to balance this with always having + // a stable chain order, regardless of which certificate was chosen as the + // reference cert. (E.g., for a given collection of unchanging certificates, + // if we repeatedly set+unset a manual chain, triggering rebuilds, we should + // always have the same chain after each unset). Thus, delay the write of + // the referenceCert below when persisting -- but keep the sort AFTER the + // referenceCert was added to the list, not before. + // + // (Otherwise, if this is called with one existing issuer and one new + // reference cert, and the reference cert sorts before the existing + // issuer, we will sort this list and have persisted the new issuer + // first, and may fail on the subsequent write to the existing issuer. + // Alternatively, if we don't sort the issuers in this order and there's + // a parallel chain (where cert A is a child of both B and C, with + // C.ID < B.ID and C was passed in as the yet unwritten referenceCert), + // then we'll create a chain with order A -> B -> C on initial write (as + // A and B come from disk) but A -> C -> B on subsequent writes (when all + // certs come from disk). Thus the sort must be done after adding in the + // referenceCert, thus sorting it consistently, but its write must be + // singled out to occur last.) sort.SliceStable(issuers, func(i, j int) bool { return issuers[i] > issuers[j] }) @@ -116,7 +138,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // Otherwise, fetch it from disk. stored, err = sc.fetchIssuerById(identifier) if err != nil { - return fmt.Errorf("unable to fetch issuer %v to build chain: %v", identifier, err) + return fmt.Errorf("unable to fetch issuer %v to build chain: %w", identifier, err) } } @@ -127,7 +149,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt issuerIdEntryMap[identifier] = stored cert, err := stored.GetCertificate() if err != nil { - return fmt.Errorf("unable to parse issuer %v to certificate to build chain: %v", identifier, err) + return fmt.Errorf("unable to parse issuer %v to certificate to build chain: %w", identifier, err) } issuerIdCertMap[identifier] = cert @@ -417,13 +439,27 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt } // Finally, write all issuers to disk. + // + // See the note above when sorting issuers for why we delay persisting + // the referenceCert, if it was provided. for _, issuer := range issuers { entry := issuerIdEntryMap[issuer] + if referenceCert != nil && issuer == referenceCert.ID { + continue + } + err := sc.writeIssuer(entry) if err != nil { pretty := prettyIssuer(issuerIdEntryMap, issuer) - return fmt.Errorf("failed to persist issuer (%v) chain to disk: %v", pretty, err) + return fmt.Errorf("failed to persist issuer (%v) chain to disk: %w", pretty, err) + } + } + if referenceCert != nil { + err := sc.writeIssuer(issuerIdEntryMap[referenceCert.ID]) + if err != nil { + pretty := prettyIssuer(issuerIdEntryMap, referenceCert.ID) + return fmt.Errorf("failed to persist issuer (%v) chain to disk: %w", pretty, err) } } diff --git a/builtin/logical/pki/storage_migrations.go b/builtin/logical/pki/storage_migrations.go index 79ff69712..9104e5c6f 100644 --- a/builtin/logical/pki/storage_migrations.go +++ b/builtin/logical/pki/storage_migrations.go @@ -15,7 +15,7 @@ import ( // in case we find out in the future that something was horribly wrong with the migration, // and we need to perform it again... const ( - latestMigrationVersion = 1 + latestMigrationVersion = 2 legacyBundleShimID = issuerID("legacy-entry-shim-id") legacyBundleShimKeyID = keyID("legacy-entry-shim-key-id") ) @@ -83,13 +83,13 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { var issuerIdentifier issuerID var keyIdentifier keyID + sc := b.makeStorageContext(ctx, s) if migrationInfo.legacyBundle != nil { // Generate a unique name for the migrated items in case things were to be re-migrated again // for some weird reason in the future... migrationName := fmt.Sprintf("current-%d", time.Now().Unix()) b.Logger().Info("performing PKI migration to new keys/issuers layout") - sc := b.makeStorageContext(ctx, s) anIssuer, aKey, err := sc.writeCaBundle(migrationInfo.legacyBundle, migrationName, migrationName) if err != nil { return err @@ -104,6 +104,19 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { b.crlBuilder.requestRebuildIfActiveNode(b) } + if migrationInfo.migrationLog != nil && migrationInfo.migrationLog.MigrationVersion == 1 { + // We've seen a bundle with migration version 1; this means an + // earlier version of the code ran which didn't have the fix for + // correct write order in rebuildIssuersChains(...). Rather than + // having every user read the migrated active issuer and see if + // their chains need rebuilding, we'll schedule a one-off chain + // migration here. + b.Logger().Info(fmt.Sprintf("%v: performing maintenance rebuild of ca_chains", b.backendUUID)) + if err := sc.rebuildIssuersChains(nil); err != nil { + return err + } + } + // We always want to write out this log entry as the secondary clusters leverage this path to wake up // if they were upgraded prior to the primary cluster's migration occurred. err = setLegacyBundleMigrationLog(ctx, s, &legacyBundleMigrationLog{ @@ -117,6 +130,8 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { return err } + b.Logger().Info(fmt.Sprintf("%v: succeeded in migrating to issuer storage version %v", b.backendUUID, latestMigrationVersion)) + return nil } diff --git a/builtin/logical/pki/storage_migrations_test.go b/builtin/logical/pki/storage_migrations_test.go index 96da109bb..170f2dfc0 100644 --- a/builtin/logical/pki/storage_migrations_test.go +++ b/builtin/logical/pki/storage_migrations_test.go @@ -162,7 +162,6 @@ func Test_migrateStorageSimpleBundle(t *testing.T) { request := &logical.InitializationRequest{Storage: s} err = b.initialize(ctx, request) require.NoError(t, err) - require.NoError(t, err) issuerIds, err := sc.listIssuers() require.NoError(t, err) @@ -250,6 +249,116 @@ func Test_migrateStorageSimpleBundle(t *testing.T) { require.Equal(t, logEntry.Hash, logEntry3.Hash) } +func TestMigration_OnceChainRebuild(t *testing.T) { + t.Parallel() + ctx := context.Background() + b, s := createBackendWithStorage(t) + sc := b.makeStorageContext(ctx, s) + + // Create a legacy CA bundle that we'll migrate to the new layout. We call + // ToParsedCertBundle just to make sure it works and to populate + // bundle.SerialNumber for us. + bundle := &certutil.CertBundle{ + PrivateKeyType: certutil.RSAPrivateKey, + Certificate: migIntCA, + IssuingCA: migRootCA, + CAChain: []string{migRootCA}, + PrivateKey: migIntPrivKey, + } + _, err := bundle.ToParsedCertBundle() + require.NoError(t, err) + writeLegacyBundle(t, b, s, bundle) + + // Do an initial migration. Ensure we end up at least on version 2. + request := &logical.InitializationRequest{Storage: s} + err = b.initialize(ctx, request) + require.NoError(t, err) + + issuerIds, err := sc.listIssuers() + require.NoError(t, err) + require.Equal(t, 2, len(issuerIds)) + + keyIds, err := sc.listKeys() + require.NoError(t, err) + require.Equal(t, 1, len(keyIds)) + + logEntry, err := getLegacyBundleMigrationLog(ctx, s) + require.NoError(t, err) + require.NotNil(t, logEntry) + require.GreaterOrEqual(t, logEntry.MigrationVersion, 2) + require.GreaterOrEqual(t, latestMigrationVersion, 2) + + // Verify the chain built correctly: current should have a CA chain of + // length two. + // + // Afterwards, we mutate these issuers to only point at themselves and + // write back out. + var rootIssuerId issuerID + var intIssuerId issuerID + for _, issuerId := range issuerIds { + issuer, err := sc.fetchIssuerById(issuerId) + require.NoError(t, err) + require.NotNil(t, issuer) + + if strings.HasPrefix(issuer.Name, "current-") { + require.Equal(t, 2, len(issuer.CAChain)) + require.Equal(t, migIntCA, issuer.CAChain[0]) + require.Equal(t, migRootCA, issuer.CAChain[1]) + intIssuerId = issuerId + + issuer.CAChain = []string{migIntCA} + err = sc.writeIssuer(issuer) + require.NoError(t, err) + } else { + require.Equal(t, 1, len(issuer.CAChain)) + require.Equal(t, migRootCA, issuer.CAChain[0]) + rootIssuerId = issuerId + } + } + + // Reset our migration version back to one, as if this never + // happened... + logEntry.MigrationVersion = 1 + err = setLegacyBundleMigrationLog(ctx, s, logEntry) + require.NoError(t, err) + b.pkiStorageVersion.Store(1) + + // Re-attempt the migration by reinitializing the mount. + err = b.initialize(ctx, request) + require.NoError(t, err) + + newIssuerIds, err := sc.listIssuers() + require.NoError(t, err) + require.Equal(t, 2, len(newIssuerIds)) + require.Equal(t, issuerIds, newIssuerIds) + + newKeyIds, err := sc.listKeys() + require.NoError(t, err) + require.Equal(t, 1, len(newKeyIds)) + require.Equal(t, keyIds, newKeyIds) + + logEntry, err = getLegacyBundleMigrationLog(ctx, s) + require.NoError(t, err) + require.NotNil(t, logEntry) + require.Equal(t, logEntry.MigrationVersion, latestMigrationVersion) + + // Ensure the chains are correct on the intermediate. By using the + // issuerId saved above, this ensures we didn't change any issuerIds, + // we merely updated the existing issuers. + intIssuer, err := sc.fetchIssuerById(intIssuerId) + require.NoError(t, err) + require.NotNil(t, intIssuer) + require.Equal(t, 2, len(intIssuer.CAChain)) + require.Equal(t, migIntCA, intIssuer.CAChain[0]) + require.Equal(t, migRootCA, intIssuer.CAChain[1]) + + rootIssuer, err := sc.fetchIssuerById(rootIssuerId) + require.NoError(t, err) + require.NotNil(t, rootIssuer) + require.Equal(t, 1, len(rootIssuer.CAChain)) + require.Equal(t, migRootCA, rootIssuer.CAChain[0]) +} + func TestExpectedOpsWork_PreMigration(t *testing.T) { t.Parallel() ctx := context.Background() @@ -496,3 +605,75 @@ func requireFailInMigration(t *testing.T, b *backend, s logical.Storage, operati require.Contains(t, resp.Error().Error(), "migration has completed", "error message did not contain migration test for op:%s path:%s resp: %#v", operation, path, resp) } + +// Keys to simulate an intermediate CA mount with also-imported root (parent). +const ( + migIntPrivKey = `-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAqu88Jcct/EyT8gDF+jdWuAwFplvanQ7KXAO5at58G6Y39UUz +fwnMS3P3VRBUoV5BDX+13wI2ldskbTKITsl6IXBPXUz0sKrdEKzXRVY4D6P2JR7W +YO1IUytfTgR+3F4sotFNQB++3ivT66AYLW7lOkoa+5lxsPM/oJ82DOlD2uGtDVTU +gQy1zugMBgPDlj+8tB562J9MTIdcKe9JpYrN0eO+aHzhbfvaSpScU4aZBgkS0kDv +8G4FxVfrBSDeD/JjCWaC48rLdgei1YrY0NFuw/8p/nPfA9vf2AtHMsWZRSwukQfq +I5HhQu+0OHQy3NWqXaBPzJNu3HnpKLykPHW7sQIDAQABAoIBAHNJy/2G66MhWx98 +Ggt7S4fyw9TCWx5XHXEWKfbEfFyBrXhF5kemqh2x5319+DamRaX/HwF8kqhcF6N2 +06ygAzmOcFjzUI3fkB5xFPh1AHa8FYZP2DOjloZR2IPcUFv9QInINRwszSU31kUz +w1rRUtYPqUdM5Pt99Mo219O5eMSlGtPKXm09uDAR8ZPuUx4jwGw90pSgeRB1Bg7X +Dt3YXx3X+OOs3Hbir1VDLSqCuy825l6Kn79h3eB8LAi+FUwCBvnTqyOEWyH2XjgP +z+tbz7lwnhGeKtxUl6Jb3m3SHtXpylot/4fwPisRV/9vaEDhVjKTmySH1WM+TRNR +CQLCJekCgYEA3b67DBhAYsFFdUd/4xh4QhHBanOcepV1CwaRln+UUjw1618ZEsTS +DKb9IS72C+ukUusGhQqxjFJlhOdXeMXpEbnEUY3PlREevWwm3bVAxtoAVRcmkQyK +PM4Oj9ODi2z8Cds0NvEXdX69uVutcbvm/JRZr/dsERWcLsfwdV/QqYcCgYEAxVce +d4ylsqORLm0/gcLnEyB9zhEPwmiJe1Yj5sH7LhGZ6JtLCqbOJO4jXmIzCrkbGyuf +BA/U7klc6jSprkBMgYhgOIuaULuFJvtKzJUzoATGFqX4r8WJm2ZycXgooAwZq6SZ +ySXOuQe9V7hlpI0fJfNhw+/HIjivL1jrnjBoXwcCgYEAtTv6LLx1g0Frv5scj0Ok +pntUlei/8ADPlJ9dxp+nXj8P4rvrBkgPVX/2S3TSbJO/znWA8qP20TVW+/UIrRE0 +mOQ37F/3VWKUuUT3zyUhOGVc+C7fupWBNolDpZG+ZepBZNzgJDeQcNuRvTmM3PQy +qiWl2AhlLuF2sVWA1q3lIWkCgYEAnuHWgNA3dE1nDWceE351hxvIzklEU/TQhAHF +o/uYHO5E6VdmoqvMG0W0KkCL8d046rZDMAUDHdrpOROvbcENF9lSBxS26LshqFH4 +ViDmULanOgLk57f2Y6ynBZ6Frt4vKNe8jYuoFacale67vzFz251JoHSD8pSKz2cb +ROCal68CgYA51hKqvki4r5rmS7W/Yvc3x3Wc0wpDEHTgLMoH+EV7AffJ8dy0/+po +AHK0nnRU63++1JmhQczBR0yTI6PUyeegEBk/d5CgFlY7UJQMTFPsMsiuM0Xw5nAv +KMPykK01D28UAkUxhwF7CqFrwwEv9GislgjewbdF5Za176+EuMEwIw== +-----END RSA PRIVATE KEY----- +` + migIntCA = `-----BEGIN CERTIFICATE----- +MIIDHTCCAgWgAwIBAgIUfxlNBmrI7jsgH2Sdle1nVTqn5YQwDQYJKoZIhvcNAQEL +BQAwEjEQMA4GA1UEAxMHUm9vdCBYMTAeFw0yMjExMDIxMjI2MjhaFw0yMjEyMDQx +MjI2NThaMBoxGDAWBgNVBAMTD0ludGVybWVkaWF0ZSBSMTCCASIwDQYJKoZIhvcN +AQEBBQADggEPADCCAQoCggEBAKrvPCXHLfxMk/IAxfo3VrgMBaZb2p0OylwDuWre +fBumN/VFM38JzEtz91UQVKFeQQ1/td8CNpXbJG0yiE7JeiFwT11M9LCq3RCs10VW +OA+j9iUe1mDtSFMrX04EftxeLKLRTUAfvt4r0+ugGC1u5TpKGvuZcbDzP6CfNgzp +Q9rhrQ1U1IEMtc7oDAYDw5Y/vLQeetifTEyHXCnvSaWKzdHjvmh84W372kqUnFOG +mQYJEtJA7/BuBcVX6wUg3g/yYwlmguPKy3YHotWK2NDRbsP/Kf5z3wPb39gLRzLF +mUUsLpEH6iOR4ULvtDh0MtzVql2gT8yTbtx56Si8pDx1u7ECAwEAAaNjMGEwDgYD +VR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFusWj3piAiY +CR7tszR6uNYSMLe2MB8GA1UdIwQYMBaAFMNRNkLozstIhNhXCefi+WnaQApbMA0G +CSqGSIb3DQEBCwUAA4IBAQCmH852E/pDGBhf2VI1JAPZy9VYaRkKoqn4+5R1Gnoq +b90zhdCGueIm/usC1wAa0OOn7+xdQXFNfeI8UUB9w10q0QnM/A/G2v8UkdlLPPQP +zPjIYLalOOIOHf8hU2O5lwj0IA4JwjwDQ4xj69eX/N+x2LEI7SHyVVUZWAx0Y67a +QdyubpIJZlW/PI7kMwGyTx3tdkZxk1nTNtf/0nKvNuXKKcVzBCEMfvXyx4LFEM+U +nc2vdWN7PAoXcjUbxD3ZNGinr7mSBpQg82+nur/8yuSwu6iHomnfGxjUsEHic2GC +ja9siTbR+ONvVb4xUjugN/XmMSSaZnxig2vM9xcV8OMG +-----END CERTIFICATE----- +` + migRootCA = `-----BEGIN CERTIFICATE----- +MIIDFTCCAf2gAwIBAgIURDTnXp8u78jWMe770Jj6Ac1paxkwDQYJKoZIhvcNAQEL +BQAwEjEQMA4GA1UEAxMHUm9vdCBYMTAeFw0yMjExMDIxMjI0NTVaFw0yMjEyMDQx +MjI1MjRaMBIxEDAOBgNVBAMTB1Jvb3QgWDEwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC/+dh/o1qKTOua/OkHRMIvHiyBxjjoqrLqFSBYhjYKs+alA0qS +lLVzNqIKU8jm3fT73orx7yk/6acWaEYv/6owMaUn51xwS3gQhTHdFR/fLJwXnu2O +PZNqAs6tjAM3Q08aqR0qfxnjDvcgO7TOWSyOvVT2cTRK+uKYzxJEY52BDMUbp+iC +WJdXca9UwKRzi2wFqGliDycYsBBt/tr8tHSbTSZ5Qx6UpFrKpjZn+sT5KhKUlsdd +BYFmRegc0wXq4/kRjum0oEUigUMlHADIEhRasyXPEKa19sGP8nAZfo/hNOusGhj7 +z7UPA0Cbe2uclpYPxsKgvcqQmgKugqKLL305AgMBAAGjYzBhMA4GA1UdDwEB/wQE +AwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTDUTZC6M7LSITYVwnn4vlp +2kAKWzAfBgNVHSMEGDAWgBTDUTZC6M7LSITYVwnn4vlp2kAKWzANBgkqhkiG9w0B +AQsFAAOCAQEAu7qdM1Li6V6iDCPpLg5zZReRtcxhUdwb5Xn4sDa8GJCy35f1voew +n0TQgM3Uph5x/djCR/Sj91MyAJ1/Q1PQQTyKGyUjSHvkcOBg628IAnLthn8Ua1fL +oQC/F/mlT1Yv+/W8eNPtD453/P0z8E0xMT5K3kpEDW/6K9RdHZlDJMW/z3UJ+4LN +6ONjIBmgffmLz9sVMpgCFyL7+w3W01bGP7w5AfKj2duoVG/Ekf2yUwmm6r9NgTQ1 +oke0ShbZuMocwO8anq7k0R42FoluH3ipv9Qzzhsy+KdK5/fW5oqy1tKFaZsc67Q6 +0UmD9DiDpCtn2Wod3nwxn0zW5HvDAWuDwg== +-----END CERTIFICATE----- +` +) diff --git a/builtin/logical/pki/storage_test.go b/builtin/logical/pki/storage_test.go index 3ff2de6d5..2f9f99c28 100644 --- a/builtin/logical/pki/storage_test.go +++ b/builtin/logical/pki/storage_test.go @@ -258,3 +258,11 @@ func genCertBundle(t *testing.T, b *backend, s logical.Storage) *certutil.CertBu require.NoError(t, err) return certBundle } + +func writeLegacyBundle(t *testing.T, b *backend, s logical.Storage, bundle *certutil.CertBundle) { + entry, err := logical.StorageEntryJSON(legacyCertBundlePath, bundle) + require.NoError(t, err) + + err = s.Put(context.Background(), entry) + require.NoError(t, err) +} diff --git a/changelog/17772.txt b/changelog/17772.txt new file mode 100644 index 000000000..6866d3f59 --- /dev/null +++ b/changelog/17772.txt @@ -0,0 +1,3 @@ +```release-note:bug +secret/pki: fix bug with initial legacy bundle migration (from < 1.11 into 1.11+) and missing issuers from ca_chain +``` diff --git a/website/content/docs/secrets/pki/considerations.mdx b/website/content/docs/secrets/pki/considerations.mdx index 8c3f7d15f..035942397 100644 --- a/website/content/docs/secrets/pki/considerations.mdx +++ b/website/content/docs/secrets/pki/considerations.mdx @@ -37,6 +37,7 @@ generating the CA to use with this secrets engine. - [Cluster Scalability](#cluster-scalability) - [PSS Support](#pss-support) - [Issuer Subjects and CRLs](#issuer-subjects-and-crls) + - [Issuer Storage Migration Issues](#issuer-storage-migration-issues) ## Be Careful with Root CAs @@ -616,6 +617,33 @@ correctly throughout all parts of the PKI. In particular, CRLs embed a track revocation, but note that performance characteristics are different between OCSP and CRLs. +## Issuer Storage Migration Issues + +When Vault migrates to the new multi-issuer storage layout on releases prior +to 1.11.6, 1.12.2, and 1.13, and storage write errors occur during the mount +initialization and storage migration process, the default issuer _may_ not +have the correct `ca_chain` value and may only have the self-reference. These +write errors most commonly manifest in logs as a message like +`failed to persist issuer ... chain to disk: ` and indicate that Vault +was not stable at the time of migration. Note that this only occurs when more +than one issuer exists within the mount (such as an intermediate with root). + +To fix this manually (until a new version of Vault automatically rebuilds the +issuer chain), a rebuild of the chains can be performed: + +``` +curl -X PATCH -H "Content-Type: application/merge-patch+json" -H "X-Vault-Request: true" -H "X-Vault-Token: $(vault print token)" -d '{"manual_chain":"self"}' https://.../issuer/default +curl -X PATCH -H "Content-Type: application/merge-patch+json" -H "X-Vault-Request: true" -H "X-Vault-Token: $(vault print token)" -d '{"manual_chain":""}' https://.../issuer/default +``` + +This temporarily sets the manual chain on the default issuer to a self-chain +only, before reverting it back to automatic chain building. This triggers a +refresh of the `ca_chain` field on the issuer, and can be verified with: + +``` +vault read pki/issuer/default +``` + ## Tutorial Refer to the [Build Your Own Certificate Authority (CA)](https://learn.hashicorp.com/vault/secrets-management/sm-pki-engine)