diff --git a/builtin/logical/pki/chain_util.go b/builtin/logical/pki/chain_util.go index 334000a7c..6b7a6a5c7 100644 --- a/builtin/logical/pki/chain_util.go +++ b/builtin/logical/pki/chain_util.go @@ -43,7 +43,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // To begin, we fetch all known issuers from disk. issuers, err := sc.listIssuers() if err != nil { - return fmt.Errorf("unable to list issuers to build chain: %v", err) + return fmt.Errorf("unable to list issuers to build chain: %w", err) } // Fast path: no issuers means we can set the reference cert's value, if @@ -79,6 +79,28 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // Now call a stable sorting algorithm here. We want to ensure the results // are the same across multiple calls to rebuildIssuersChains with the same // input data. + // + // Note: while we want to ensure referenceCert is written last (because it + // is the user-facing action), we need to balance this with always having + // a stable chain order, regardless of which certificate was chosen as the + // reference cert. (E.g., for a given collection of unchanging certificates, + // if we repeatedly set+unset a manual chain, triggering rebuilds, we should + // always have the same chain after each unset). Thus, delay the write of + // the referenceCert below when persisting -- but keep the sort AFTER the + // referenceCert was added to the list, not before. + // + // (Otherwise, if this is called with one existing issuer and one new + // reference cert, and the reference cert sorts before the existing + // issuer, we will sort this list and have persisted the new issuer + // first, and may fail on the subsequent write to the existing issuer. + // Alternatively, if we don't sort the issuers in this order and there's + // a parallel chain (where cert A is a child of both B and C, with + // C.ID < B.ID and C was passed in as the yet unwritten referenceCert), + // then we'll create a chain with order A -> B -> C on initial write (as + // A and B come from disk) but A -> C -> B on subsequent writes (when all + // certs come from disk). Thus the sort must be done after adding in the + // referenceCert, thus sorting it consistently, but its write must be + // singled out to occur last.) sort.SliceStable(issuers, func(i, j int) bool { return issuers[i] > issuers[j] }) @@ -116,7 +138,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt // Otherwise, fetch it from disk. stored, err = sc.fetchIssuerById(identifier) if err != nil { - return fmt.Errorf("unable to fetch issuer %v to build chain: %v", identifier, err) + return fmt.Errorf("unable to fetch issuer %v to build chain: %w", identifier, err) } } @@ -127,7 +149,7 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt issuerIdEntryMap[identifier] = stored cert, err := stored.GetCertificate() if err != nil { - return fmt.Errorf("unable to parse issuer %v to certificate to build chain: %v", identifier, err) + return fmt.Errorf("unable to parse issuer %v to certificate to build chain: %w", identifier, err) } issuerIdCertMap[identifier] = cert @@ -417,13 +439,27 @@ func (sc *storageContext) rebuildIssuersChains(referenceCert *issuerEntry /* opt } // Finally, write all issuers to disk. + // + // See the note above when sorting issuers for why we delay persisting + // the referenceCert, if it was provided. for _, issuer := range issuers { entry := issuerIdEntryMap[issuer] + if referenceCert != nil && issuer == referenceCert.ID { + continue + } + err := sc.writeIssuer(entry) if err != nil { pretty := prettyIssuer(issuerIdEntryMap, issuer) - return fmt.Errorf("failed to persist issuer (%v) chain to disk: %v", pretty, err) + return fmt.Errorf("failed to persist issuer (%v) chain to disk: %w", pretty, err) + } + } + if referenceCert != nil { + err := sc.writeIssuer(issuerIdEntryMap[referenceCert.ID]) + if err != nil { + pretty := prettyIssuer(issuerIdEntryMap, referenceCert.ID) + return fmt.Errorf("failed to persist issuer (%v) chain to disk: %w", pretty, err) } } diff --git a/builtin/logical/pki/storage_migrations.go b/builtin/logical/pki/storage_migrations.go index 79ff69712..9104e5c6f 100644 --- a/builtin/logical/pki/storage_migrations.go +++ b/builtin/logical/pki/storage_migrations.go @@ -15,7 +15,7 @@ import ( // in case we find out in the future that something was horribly wrong with the migration, // and we need to perform it again... const ( - latestMigrationVersion = 1 + latestMigrationVersion = 2 legacyBundleShimID = issuerID("legacy-entry-shim-id") legacyBundleShimKeyID = keyID("legacy-entry-shim-key-id") ) @@ -83,13 +83,13 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { var issuerIdentifier issuerID var keyIdentifier keyID + sc := b.makeStorageContext(ctx, s) if migrationInfo.legacyBundle != nil { // Generate a unique name for the migrated items in case things were to be re-migrated again // for some weird reason in the future... migrationName := fmt.Sprintf("current-%d", time.Now().Unix()) b.Logger().Info("performing PKI migration to new keys/issuers layout") - sc := b.makeStorageContext(ctx, s) anIssuer, aKey, err := sc.writeCaBundle(migrationInfo.legacyBundle, migrationName, migrationName) if err != nil { return err @@ -104,6 +104,19 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { b.crlBuilder.requestRebuildIfActiveNode(b) } + if migrationInfo.migrationLog != nil && migrationInfo.migrationLog.MigrationVersion == 1 { + // We've seen a bundle with migration version 1; this means an + // earlier version of the code ran which didn't have the fix for + // correct write order in rebuildIssuersChains(...). Rather than + // having every user read the migrated active issuer and see if + // their chains need rebuilding, we'll schedule a one-off chain + // migration here. + b.Logger().Info(fmt.Sprintf("%v: performing maintenance rebuild of ca_chains", b.backendUUID)) + if err := sc.rebuildIssuersChains(nil); err != nil { + return err + } + } + // We always want to write out this log entry as the secondary clusters leverage this path to wake up // if they were upgraded prior to the primary cluster's migration occurred. err = setLegacyBundleMigrationLog(ctx, s, &legacyBundleMigrationLog{ @@ -117,6 +130,8 @@ func migrateStorage(ctx context.Context, b *backend, s logical.Storage) error { return err } + b.Logger().Info(fmt.Sprintf("%v: succeeded in migrating to issuer storage version %v", b.backendUUID, latestMigrationVersion)) + return nil } diff --git a/builtin/logical/pki/storage_migrations_test.go b/builtin/logical/pki/storage_migrations_test.go index 96da109bb..170f2dfc0 100644 --- a/builtin/logical/pki/storage_migrations_test.go +++ b/builtin/logical/pki/storage_migrations_test.go @@ -162,7 +162,6 @@ func Test_migrateStorageSimpleBundle(t *testing.T) { request := &logical.InitializationRequest{Storage: s} err = b.initialize(ctx, request) require.NoError(t, err) - require.NoError(t, err) issuerIds, err := sc.listIssuers() require.NoError(t, err) @@ -250,6 +249,116 @@ func Test_migrateStorageSimpleBundle(t *testing.T) { require.Equal(t, logEntry.Hash, logEntry3.Hash) } +func TestMigration_OnceChainRebuild(t *testing.T) { + t.Parallel() + ctx := context.Background() + b, s := createBackendWithStorage(t) + sc := b.makeStorageContext(ctx, s) + + // Create a legacy CA bundle that we'll migrate to the new layout. We call + // ToParsedCertBundle just to make sure it works and to populate + // bundle.SerialNumber for us. + bundle := &certutil.CertBundle{ + PrivateKeyType: certutil.RSAPrivateKey, + Certificate: migIntCA, + IssuingCA: migRootCA, + CAChain: []string{migRootCA}, + PrivateKey: migIntPrivKey, + } + _, err := bundle.ToParsedCertBundle() + require.NoError(t, err) + writeLegacyBundle(t, b, s, bundle) + + // Do an initial migration. Ensure we end up at least on version 2. + request := &logical.InitializationRequest{Storage: s} + err = b.initialize(ctx, request) + require.NoError(t, err) + + issuerIds, err := sc.listIssuers() + require.NoError(t, err) + require.Equal(t, 2, len(issuerIds)) + + keyIds, err := sc.listKeys() + require.NoError(t, err) + require.Equal(t, 1, len(keyIds)) + + logEntry, err := getLegacyBundleMigrationLog(ctx, s) + require.NoError(t, err) + require.NotNil(t, logEntry) + require.GreaterOrEqual(t, logEntry.MigrationVersion, 2) + require.GreaterOrEqual(t, latestMigrationVersion, 2) + + // Verify the chain built correctly: current should have a CA chain of + // length two. + // + // Afterwards, we mutate these issuers to only point at themselves and + // write back out. + var rootIssuerId issuerID + var intIssuerId issuerID + for _, issuerId := range issuerIds { + issuer, err := sc.fetchIssuerById(issuerId) + require.NoError(t, err) + require.NotNil(t, issuer) + + if strings.HasPrefix(issuer.Name, "current-") { + require.Equal(t, 2, len(issuer.CAChain)) + require.Equal(t, migIntCA, issuer.CAChain[0]) + require.Equal(t, migRootCA, issuer.CAChain[1]) + intIssuerId = issuerId + + issuer.CAChain = []string{migIntCA} + err = sc.writeIssuer(issuer) + require.NoError(t, err) + } else { + require.Equal(t, 1, len(issuer.CAChain)) + require.Equal(t, migRootCA, issuer.CAChain[0]) + rootIssuerId = issuerId + } + } + + // Reset our migration version back to one, as if this never + // happened... + logEntry.MigrationVersion = 1 + err = setLegacyBundleMigrationLog(ctx, s, logEntry) + require.NoError(t, err) + b.pkiStorageVersion.Store(1) + + // Re-attempt the migration by reinitializing the mount. + err = b.initialize(ctx, request) + require.NoError(t, err) + + newIssuerIds, err := sc.listIssuers() + require.NoError(t, err) + require.Equal(t, 2, len(newIssuerIds)) + require.Equal(t, issuerIds, newIssuerIds) + + newKeyIds, err := sc.listKeys() + require.NoError(t, err) + require.Equal(t, 1, len(newKeyIds)) + require.Equal(t, keyIds, newKeyIds) + + logEntry, err = getLegacyBundleMigrationLog(ctx, s) + require.NoError(t, err) + require.NotNil(t, logEntry) + require.Equal(t, logEntry.MigrationVersion, latestMigrationVersion) + + // Ensure the chains are correct on the intermediate. By using the + // issuerId saved above, this ensures we didn't change any issuerIds, + // we merely updated the existing issuers. + intIssuer, err := sc.fetchIssuerById(intIssuerId) + require.NoError(t, err) + require.NotNil(t, intIssuer) + require.Equal(t, 2, len(intIssuer.CAChain)) + require.Equal(t, migIntCA, intIssuer.CAChain[0]) + require.Equal(t, migRootCA, intIssuer.CAChain[1]) + + rootIssuer, err := sc.fetchIssuerById(rootIssuerId) + require.NoError(t, err) + require.NotNil(t, rootIssuer) + require.Equal(t, 1, len(rootIssuer.CAChain)) + require.Equal(t, migRootCA, rootIssuer.CAChain[0]) +} + func TestExpectedOpsWork_PreMigration(t *testing.T) { t.Parallel() ctx := context.Background() @@ -496,3 +605,75 @@ func requireFailInMigration(t *testing.T, b *backend, s logical.Storage, operati require.Contains(t, resp.Error().Error(), "migration has completed", "error message did not contain migration test for op:%s path:%s resp: %#v", operation, path, resp) } + +// Keys to simulate an intermediate CA mount with also-imported root (parent). +const ( + migIntPrivKey = `-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAqu88Jcct/EyT8gDF+jdWuAwFplvanQ7KXAO5at58G6Y39UUz +fwnMS3P3VRBUoV5BDX+13wI2ldskbTKITsl6IXBPXUz0sKrdEKzXRVY4D6P2JR7W +YO1IUytfTgR+3F4sotFNQB++3ivT66AYLW7lOkoa+5lxsPM/oJ82DOlD2uGtDVTU +gQy1zugMBgPDlj+8tB562J9MTIdcKe9JpYrN0eO+aHzhbfvaSpScU4aZBgkS0kDv +8G4FxVfrBSDeD/JjCWaC48rLdgei1YrY0NFuw/8p/nPfA9vf2AtHMsWZRSwukQfq +I5HhQu+0OHQy3NWqXaBPzJNu3HnpKLykPHW7sQIDAQABAoIBAHNJy/2G66MhWx98 +Ggt7S4fyw9TCWx5XHXEWKfbEfFyBrXhF5kemqh2x5319+DamRaX/HwF8kqhcF6N2 +06ygAzmOcFjzUI3fkB5xFPh1AHa8FYZP2DOjloZR2IPcUFv9QInINRwszSU31kUz +w1rRUtYPqUdM5Pt99Mo219O5eMSlGtPKXm09uDAR8ZPuUx4jwGw90pSgeRB1Bg7X +Dt3YXx3X+OOs3Hbir1VDLSqCuy825l6Kn79h3eB8LAi+FUwCBvnTqyOEWyH2XjgP +z+tbz7lwnhGeKtxUl6Jb3m3SHtXpylot/4fwPisRV/9vaEDhVjKTmySH1WM+TRNR +CQLCJekCgYEA3b67DBhAYsFFdUd/4xh4QhHBanOcepV1CwaRln+UUjw1618ZEsTS +DKb9IS72C+ukUusGhQqxjFJlhOdXeMXpEbnEUY3PlREevWwm3bVAxtoAVRcmkQyK +PM4Oj9ODi2z8Cds0NvEXdX69uVutcbvm/JRZr/dsERWcLsfwdV/QqYcCgYEAxVce +d4ylsqORLm0/gcLnEyB9zhEPwmiJe1Yj5sH7LhGZ6JtLCqbOJO4jXmIzCrkbGyuf +BA/U7klc6jSprkBMgYhgOIuaULuFJvtKzJUzoATGFqX4r8WJm2ZycXgooAwZq6SZ +ySXOuQe9V7hlpI0fJfNhw+/HIjivL1jrnjBoXwcCgYEAtTv6LLx1g0Frv5scj0Ok +pntUlei/8ADPlJ9dxp+nXj8P4rvrBkgPVX/2S3TSbJO/znWA8qP20TVW+/UIrRE0 +mOQ37F/3VWKUuUT3zyUhOGVc+C7fupWBNolDpZG+ZepBZNzgJDeQcNuRvTmM3PQy +qiWl2AhlLuF2sVWA1q3lIWkCgYEAnuHWgNA3dE1nDWceE351hxvIzklEU/TQhAHF +o/uYHO5E6VdmoqvMG0W0KkCL8d046rZDMAUDHdrpOROvbcENF9lSBxS26LshqFH4 +ViDmULanOgLk57f2Y6ynBZ6Frt4vKNe8jYuoFacale67vzFz251JoHSD8pSKz2cb +ROCal68CgYA51hKqvki4r5rmS7W/Yvc3x3Wc0wpDEHTgLMoH+EV7AffJ8dy0/+po +AHK0nnRU63++1JmhQczBR0yTI6PUyeegEBk/d5CgFlY7UJQMTFPsMsiuM0Xw5nAv +KMPykK01D28UAkUxhwF7CqFrwwEv9GislgjewbdF5Za176+EuMEwIw== +-----END RSA PRIVATE KEY----- +` + migIntCA = `-----BEGIN CERTIFICATE----- +MIIDHTCCAgWgAwIBAgIUfxlNBmrI7jsgH2Sdle1nVTqn5YQwDQYJKoZIhvcNAQEL +BQAwEjEQMA4GA1UEAxMHUm9vdCBYMTAeFw0yMjExMDIxMjI2MjhaFw0yMjEyMDQx +MjI2NThaMBoxGDAWBgNVBAMTD0ludGVybWVkaWF0ZSBSMTCCASIwDQYJKoZIhvcN +AQEBBQADggEPADCCAQoCggEBAKrvPCXHLfxMk/IAxfo3VrgMBaZb2p0OylwDuWre +fBumN/VFM38JzEtz91UQVKFeQQ1/td8CNpXbJG0yiE7JeiFwT11M9LCq3RCs10VW +OA+j9iUe1mDtSFMrX04EftxeLKLRTUAfvt4r0+ugGC1u5TpKGvuZcbDzP6CfNgzp +Q9rhrQ1U1IEMtc7oDAYDw5Y/vLQeetifTEyHXCnvSaWKzdHjvmh84W372kqUnFOG +mQYJEtJA7/BuBcVX6wUg3g/yYwlmguPKy3YHotWK2NDRbsP/Kf5z3wPb39gLRzLF +mUUsLpEH6iOR4ULvtDh0MtzVql2gT8yTbtx56Si8pDx1u7ECAwEAAaNjMGEwDgYD +VR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFusWj3piAiY +CR7tszR6uNYSMLe2MB8GA1UdIwQYMBaAFMNRNkLozstIhNhXCefi+WnaQApbMA0G +CSqGSIb3DQEBCwUAA4IBAQCmH852E/pDGBhf2VI1JAPZy9VYaRkKoqn4+5R1Gnoq +b90zhdCGueIm/usC1wAa0OOn7+xdQXFNfeI8UUB9w10q0QnM/A/G2v8UkdlLPPQP +zPjIYLalOOIOHf8hU2O5lwj0IA4JwjwDQ4xj69eX/N+x2LEI7SHyVVUZWAx0Y67a +QdyubpIJZlW/PI7kMwGyTx3tdkZxk1nTNtf/0nKvNuXKKcVzBCEMfvXyx4LFEM+U +nc2vdWN7PAoXcjUbxD3ZNGinr7mSBpQg82+nur/8yuSwu6iHomnfGxjUsEHic2GC +ja9siTbR+ONvVb4xUjugN/XmMSSaZnxig2vM9xcV8OMG +-----END CERTIFICATE----- +` + migRootCA = `-----BEGIN CERTIFICATE----- +MIIDFTCCAf2gAwIBAgIURDTnXp8u78jWMe770Jj6Ac1paxkwDQYJKoZIhvcNAQEL +BQAwEjEQMA4GA1UEAxMHUm9vdCBYMTAeFw0yMjExMDIxMjI0NTVaFw0yMjEyMDQx +MjI1MjRaMBIxEDAOBgNVBAMTB1Jvb3QgWDEwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC/+dh/o1qKTOua/OkHRMIvHiyBxjjoqrLqFSBYhjYKs+alA0qS +lLVzNqIKU8jm3fT73orx7yk/6acWaEYv/6owMaUn51xwS3gQhTHdFR/fLJwXnu2O +PZNqAs6tjAM3Q08aqR0qfxnjDvcgO7TOWSyOvVT2cTRK+uKYzxJEY52BDMUbp+iC +WJdXca9UwKRzi2wFqGliDycYsBBt/tr8tHSbTSZ5Qx6UpFrKpjZn+sT5KhKUlsdd +BYFmRegc0wXq4/kRjum0oEUigUMlHADIEhRasyXPEKa19sGP8nAZfo/hNOusGhj7 +z7UPA0Cbe2uclpYPxsKgvcqQmgKugqKLL305AgMBAAGjYzBhMA4GA1UdDwEB/wQE +AwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTDUTZC6M7LSITYVwnn4vlp +2kAKWzAfBgNVHSMEGDAWgBTDUTZC6M7LSITYVwnn4vlp2kAKWzANBgkqhkiG9w0B +AQsFAAOCAQEAu7qdM1Li6V6iDCPpLg5zZReRtcxhUdwb5Xn4sDa8GJCy35f1voew +n0TQgM3Uph5x/djCR/Sj91MyAJ1/Q1PQQTyKGyUjSHvkcOBg628IAnLthn8Ua1fL +oQC/F/mlT1Yv+/W8eNPtD453/P0z8E0xMT5K3kpEDW/6K9RdHZlDJMW/z3UJ+4LN +6ONjIBmgffmLz9sVMpgCFyL7+w3W01bGP7w5AfKj2duoVG/Ekf2yUwmm6r9NgTQ1 +oke0ShbZuMocwO8anq7k0R42FoluH3ipv9Qzzhsy+KdK5/fW5oqy1tKFaZsc67Q6 +0UmD9DiDpCtn2Wod3nwxn0zW5HvDAWuDwg== +-----END CERTIFICATE----- +` +) diff --git a/builtin/logical/pki/storage_test.go b/builtin/logical/pki/storage_test.go index 3ff2de6d5..2f9f99c28 100644 --- a/builtin/logical/pki/storage_test.go +++ b/builtin/logical/pki/storage_test.go @@ -258,3 +258,11 @@ func genCertBundle(t *testing.T, b *backend, s logical.Storage) *certutil.CertBu require.NoError(t, err) return certBundle } + +func writeLegacyBundle(t *testing.T, b *backend, s logical.Storage, bundle *certutil.CertBundle) { + entry, err := logical.StorageEntryJSON(legacyCertBundlePath, bundle) + require.NoError(t, err) + + err = s.Put(context.Background(), entry) + require.NoError(t, err) +} diff --git a/changelog/17772.txt b/changelog/17772.txt new file mode 100644 index 000000000..6866d3f59 --- /dev/null +++ b/changelog/17772.txt @@ -0,0 +1,3 @@ +```release-note:bug +secret/pki: fix bug with initial legacy bundle migration (from < 1.11 into 1.11+) and missing issuers from ca_chain +``` diff --git a/website/content/docs/secrets/pki/considerations.mdx b/website/content/docs/secrets/pki/considerations.mdx index 8c3f7d15f..035942397 100644 --- a/website/content/docs/secrets/pki/considerations.mdx +++ b/website/content/docs/secrets/pki/considerations.mdx @@ -37,6 +37,7 @@ generating the CA to use with this secrets engine. - [Cluster Scalability](#cluster-scalability) - [PSS Support](#pss-support) - [Issuer Subjects and CRLs](#issuer-subjects-and-crls) + - [Issuer Storage Migration Issues](#issuer-storage-migration-issues) ## Be Careful with Root CAs @@ -616,6 +617,33 @@ correctly throughout all parts of the PKI. In particular, CRLs embed a track revocation, but note that performance characteristics are different between OCSP and CRLs. +## Issuer Storage Migration Issues + +When Vault migrates to the new multi-issuer storage layout on releases prior +to 1.11.6, 1.12.2, and 1.13, and storage write errors occur during the mount +initialization and storage migration process, the default issuer _may_ not +have the correct `ca_chain` value and may only have the self-reference. These +write errors most commonly manifest in logs as a message like +`failed to persist issuer ... chain to disk: ` and indicate that Vault +was not stable at the time of migration. Note that this only occurs when more +than one issuer exists within the mount (such as an intermediate with root). + +To fix this manually (until a new version of Vault automatically rebuilds the +issuer chain), a rebuild of the chains can be performed: + +``` +curl -X PATCH -H "Content-Type: application/merge-patch+json" -H "X-Vault-Request: true" -H "X-Vault-Token: $(vault print token)" -d '{"manual_chain":"self"}' https://.../issuer/default +curl -X PATCH -H "Content-Type: application/merge-patch+json" -H "X-Vault-Request: true" -H "X-Vault-Token: $(vault print token)" -d '{"manual_chain":""}' https://.../issuer/default +``` + +This temporarily sets the manual chain on the default issuer to a self-chain +only, before reverting it back to automatic chain building. This triggers a +refresh of the `ca_chain` field on the issuer, and can be verified with: + +``` +vault read pki/issuer/default +``` + ## Tutorial Refer to the [Build Your Own Certificate Authority (CA)](https://learn.hashicorp.com/vault/secrets-management/sm-pki-engine)