diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go index 4e0744b19..87e8b0c5e 100644 --- a/builtin/logical/pki/backend.go +++ b/builtin/logical/pki/backend.go @@ -220,9 +220,10 @@ const ( type tidyStatus struct { // Parameters used to initiate the operation - safetyBuffer int - tidyCertStore bool - tidyRevokedCerts bool + safetyBuffer int + tidyCertStore bool + tidyRevokedCerts bool + tidyRevokedAssocs bool // Status state tidyStatusState @@ -232,6 +233,7 @@ type tidyStatus struct { message string certStoreDeletedCount uint revokedCertDeletedCount uint + missingIssuerCertCount uint } const backendHelp = ` diff --git a/builtin/logical/pki/backend_test.go b/builtin/logical/pki/backend_test.go index 95ed3574f..92aa1758e 100644 --- a/builtin/logical/pki/backend_test.go +++ b/builtin/logical/pki/backend_test.go @@ -3873,16 +3873,18 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) { t.Fatal(err) } expectedData := map[string]interface{}{ - "safety_buffer": json.Number("1"), - "tidy_cert_store": true, - "tidy_revoked_certs": true, - "state": "Finished", - "error": nil, - "time_started": nil, - "time_finished": nil, - "message": nil, - "cert_store_deleted_count": json.Number("1"), - "revoked_cert_deleted_count": json.Number("1"), + "safety_buffer": json.Number("1"), + "tidy_cert_store": true, + "tidy_revoked_certs": true, + "tidy_revoked_cert_issuer_associations": false, + "state": "Finished", + "error": nil, + "time_started": nil, + "time_finished": nil, + "message": nil, + "cert_store_deleted_count": json.Number("1"), + "revoked_cert_deleted_count": json.Number("1"), + "missing_issuer_cert_count": json.Number("0"), } // Let's copy the times from the response so that we can use deep.Equal() timeStarted, ok := tidyStatus.Data["time_started"] diff --git a/builtin/logical/pki/crl_test.go b/builtin/logical/pki/crl_test.go index cde03af10..03effc8c3 100644 --- a/builtin/logical/pki/crl_test.go +++ b/builtin/logical/pki/crl_test.go @@ -1052,6 +1052,145 @@ func TestAutoRebuild(t *testing.T) { } } +func TestTidyIssuerAssociation(t *testing.T) { + t.Parallel() + + b, s := createBackendWithStorage(t) + + // Create a root CA. + resp, err := CBWrite(b, s, "root/generate/internal", map[string]interface{}{ + "common_name": "root example.com", + "issuer_name": "root", + "key_type": "ec", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotEmpty(t, resp.Data["certificate"]) + require.NotEmpty(t, resp.Data["issuer_id"]) + rootCert := resp.Data["certificate"].(string) + rootID := resp.Data["issuer_id"].(issuerID) + + // Create a role for issuance. + _, err = CBWrite(b, s, "roles/local-testing", map[string]interface{}{ + "allow_any_name": true, + "enforce_hostnames": false, + "key_type": "ec", + "ttl": "75m", + }) + require.NoError(t, err) + + // Issue a leaf cert and ensure we can revoke it. + resp, err = CBWrite(b, s, "issue/local-testing", map[string]interface{}{ + "common_name": "testing", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotEmpty(t, resp.Data["serial_number"]) + leafSerial := resp.Data["serial_number"].(string) + + _, err = CBWrite(b, s, "revoke", map[string]interface{}{ + "serial_number": leafSerial, + }) + require.NoError(t, err) + + // This leaf's revInfo entry should have an issuer associated + // with it. + entry, err := s.Get(ctx, revokedPath+normalizeSerial(leafSerial)) + require.NoError(t, err) + require.NotNil(t, entry) + require.NotNil(t, entry.Value) + + var leafInfo revocationInfo + err = entry.DecodeJSON(&leafInfo) + require.NoError(t, err) + require.Equal(t, rootID, leafInfo.CertificateIssuer) + + // Now remove the root and run tidy. + _, err = CBDelete(b, s, "issuer/default") + require.NoError(t, err) + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_revoked_cert_issuer_associations": true, + }) + require.NoError(t, err) + + // Wait for tidy to finish. + for { + time.Sleep(125 * time.Millisecond) + + resp, err = CBRead(b, s, "tidy-status") + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["state"]) + state := resp.Data["state"].(string) + + if state == "Finished" { + break + } + if state == "Error" { + t.Fatalf("unexpected state for tidy operation: Error:\nStatus: %v", resp.Data) + } + } + + // Ensure we don't have an association on this leaf any more. + entry, err = s.Get(ctx, revokedPath+normalizeSerial(leafSerial)) + require.NoError(t, err) + require.NotNil(t, entry) + require.NotNil(t, entry.Value) + + err = entry.DecodeJSON(&leafInfo) + require.NoError(t, err) + require.Empty(t, leafInfo.CertificateIssuer) + + // Now, re-import the root and try again. + resp, err = CBWrite(b, s, "issuers/import/bundle", map[string]interface{}{ + "pem_bundle": rootCert, + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotNil(t, resp.Data["imported_issuers"]) + importedIssuers := resp.Data["imported_issuers"].([]string) + require.Equal(t, 1, len(importedIssuers)) + newRootID := importedIssuers[0] + require.NotEmpty(t, newRootID) + + // Re-run tidy... + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_revoked_cert_issuer_associations": true, + }) + require.NoError(t, err) + + // Wait for tidy to finish. + for { + time.Sleep(125 * time.Millisecond) + + resp, err = CBRead(b, s, "tidy-status") + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["state"]) + state := resp.Data["state"].(string) + + if state == "Finished" { + break + } + if state == "Error" { + t.Fatalf("unexpected state for tidy operation: Error:\nStatus: %v", resp.Data) + } + } + + // Finally, double-check we associated things correctly. + entry, err = s.Get(ctx, revokedPath+normalizeSerial(leafSerial)) + require.NoError(t, err) + require.NotNil(t, entry) + require.NotNil(t, entry.Value) + + err = entry.DecodeJSON(&leafInfo) + require.NoError(t, err) + require.Equal(t, newRootID, string(leafInfo.CertificateIssuer)) +} + func requestCrlFromBackend(t *testing.T, s logical.Storage, b *backend) *logical.Response { crlReq := &logical.Request{ Operation: logical.ReadOperation, diff --git a/builtin/logical/pki/crl_util.go b/builtin/logical/pki/crl_util.go index 2b6f61aec..d470252e0 100644 --- a/builtin/logical/pki/crl_util.go +++ b/builtin/logical/pki/crl_util.go @@ -222,6 +222,50 @@ func (cb *crlBuilder) _doRebuild(ctx context.Context, b *backend, request *logic return nil } +// Helper function to fetch a map of issuerID->parsed cert for revocation +// usage. Unlike other paths, this needs to handle the legacy bundle +// more gracefully than rejecting it outright. +func fetchIssuerMapForRevocationChecking(sc *storageContext) (map[issuerID]*x509.Certificate, error) { + var err error + var issuers []issuerID + + if !sc.Backend.useLegacyBundleCaStorage() { + issuers, err = sc.listIssuers() + if err != nil { + return nil, fmt.Errorf("could not fetch issuers list: %v", err) + } + } else { + // Hack: this isn't a real issuerID, but it works for fetchCAInfo + // since it resolves the reference. + issuers = []issuerID{legacyBundleShimID} + } + + issuerIDCertMap := make(map[issuerID]*x509.Certificate, len(issuers)) + for _, issuer := range issuers { + _, bundle, caErr := sc.fetchCertBundleByIssuerId(issuer, false) + if caErr != nil { + return nil, fmt.Errorf("error fetching CA certificate for issuer id %v: %s", issuer, caErr) + } + + if bundle == nil { + return nil, fmt.Errorf("faulty reference: %v - CA info not found", issuer) + } + + parsedBundle, err := parseCABundle(sc.Context, sc.Backend, bundle) + if err != nil { + return nil, errutil.InternalError{Err: err.Error()} + } + + if parsedBundle.Certificate == nil { + return nil, errutil.InternalError{Err: "stored CA information not able to be parsed"} + } + + issuerIDCertMap[issuer] = parsedBundle.Certificate + } + + return issuerIDCertMap, nil +} + // Revokes a cert, and tries to be smart about error recovery func revokeCert(ctx context.Context, b *backend, req *logical.Request, serial string, fromLease bool) (*logical.Response, error) { // As this backend is self-contained and this function does not hook into @@ -237,53 +281,20 @@ func revokeCert(ctx context.Context, b *backend, req *logical.Request, serial st // to gracefully degrade to the legacy cert bundle when it is required, as // secondary PR clusters might not have been upgraded, but still need to // handle revoking certs. - var err error - var issuers []issuerID - sc := b.makeStorageContext(ctx, req.Storage) - if !b.useLegacyBundleCaStorage() { - issuers, err = sc.listIssuers() - if err != nil { - return logical.ErrorResponse(fmt.Sprintf("could not fetch issuers list: %v", err)), nil - } - } else { - // Hack: this isn't a real issuerID, but it works for fetchCAInfo - // since it resolves the reference. - issuers = []issuerID{legacyBundleShimID} + issuerIDCertMap, err := fetchIssuerMapForRevocationChecking(sc) + if err != nil { + return nil, err } - issuerIDCertMap := make(map[issuerID]*x509.Certificate, len(issuers)) - for _, issuer := range issuers { - _, bundle, caErr := sc.fetchCertBundleByIssuerId(issuer, false) - if caErr != nil { - switch caErr.(type) { - case errutil.UserError: - return logical.ErrorResponse(fmt.Sprintf("could not fetch the CA certificate for issuer id %v: %s", issuer, caErr)), nil - default: - return nil, fmt.Errorf("error fetching CA certificate for issuer id %v: %s", issuer, caErr) - } - } - - if bundle == nil { - return nil, fmt.Errorf("faulty reference: %v - CA info not found", issuer) - } - - parsedBundle, err := parseCABundle(ctx, b, bundle) - if err != nil { - return nil, errutil.InternalError{Err: err.Error()} - } - - if parsedBundle.Certificate == nil { - return nil, errutil.InternalError{Err: "stored CA information not able to be parsed"} - } - + // Ensure we don't revoke an issuer via this API; use /issuer/:issuer_ref/revoke + // instead. + for issuer, certificate := range issuerIDCertMap { colonSerial := strings.ReplaceAll(strings.ToLower(serial), "-", ":") - if colonSerial == serialFromCert(parsedBundle.Certificate) { + if colonSerial == serialFromCert(certificate) { return logical.ErrorResponse(fmt.Sprintf("adding issuer (id: %v) to its own CRL is not allowed", issuer)), nil } - - issuerIDCertMap[issuer] = parsedBundle.Certificate } alreadyRevoked := false @@ -683,6 +694,17 @@ func buildCRLs(ctx context.Context, b *backend, req *logical.Request, forceNew b return nil } +func isRevInfoIssuerValid(revInfo *revocationInfo, issuerIDCertMap map[issuerID]*x509.Certificate) bool { + if len(revInfo.CertificateIssuer) > 0 { + issuerId := revInfo.CertificateIssuer + if _, issuerExists := issuerIDCertMap[issuerId]; issuerExists { + return true + } + } + + return false +} + func associateRevokedCertWithIsssuer(revInfo *revocationInfo, revokedCert *x509.Certificate, issuerIDCertMap map[issuerID]*x509.Certificate) bool { for issuerId, issuerCert := range issuerIDCertMap { if bytes.Equal(revokedCert.RawIssuer, issuerCert.RawSubject) { @@ -782,12 +804,9 @@ func getRevokedCertEntries(ctx context.Context, req *logical.Request, issuerIDCe // prefer it to manually checking each issuer signature, assuming it // appears valid. It's highly unlikely for two different issuers // to have the same id (after the first was deleted). - if len(revInfo.CertificateIssuer) > 0 { - issuerId := revInfo.CertificateIssuer - if _, issuerExists := issuerIDCertMap[issuerId]; issuerExists { - revokedCertsMap[issuerId] = append(revokedCertsMap[issuerId], newRevCert) - continue - } + if isRevInfoIssuerValid(&revInfo, issuerIDCertMap) { + revokedCertsMap[revInfo.CertificateIssuer] = append(revokedCertsMap[revInfo.CertificateIssuer], newRevCert) + continue // Otherwise, fall through and update the entry. } diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go index 2972bfa05..07027f090 100644 --- a/builtin/logical/pki/path_tidy.go +++ b/builtin/logical/pki/path_tidy.go @@ -9,11 +9,20 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/vault/sdk/framework" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/logical" ) +type tidyConfig struct { + CertStore bool `json:"tidy_cert_store"` + RevokedCerts bool `json:"tidy_revoked_certs"` + IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"` + SafetyBuffer time.Duration `json:"safety_buffer"` +} + func pathTidy(b *backend) *framework.Path { return &framework.Path{ Pattern: "tidy$", @@ -36,6 +45,13 @@ and expired certificates, removing them both from the CRL and from storage. The CRL will be rotated if this causes any values to be removed.`, }, + "tidy_revoked_cert_issuer_associations": { + Type: framework.TypeBool, + Description: `Set to true to validate issuer associations +on revocation entries. This helps increase the performance of CRL building +and OCSP responses.`, + }, + "safety_buffer": { Type: framework.TypeDurationSecond, Description: `The amount of extra time that must have passed @@ -75,8 +91,8 @@ func pathTidyStatus(b *backend) *framework.Path { func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { safetyBuffer := d.Get("safety_buffer").(int) tidyCertStore := d.Get("tidy_cert_store").(bool) - tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool) - tidyRevocationList := d.Get("tidy_revocation_list").(bool) + tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool) || d.Get("tidy_revocation_list").(bool) + tidyRevokedAssocs := d.Get("tidy_revoked_cert_issuer_associations").(bool) if safetyBuffer < 1 { return logical.ErrorResponse("safety_buffer must be greater than zero"), nil @@ -84,6 +100,13 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr bufferDuration := time.Duration(safetyBuffer) * time.Second + config := &tidyConfig{ + CertStore: tidyCertStore, + RevokedCerts: tidyRevokedCerts, + IssuerAssocs: tidyRevokedAssocs, + SafetyBuffer: bufferDuration, + } + if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) { resp := &logical.Response{} resp.AddWarning("Tidy operation already in progress.") @@ -96,151 +119,39 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr Storage: req.Storage, } + b.startTidyOperation(req, config) + + resp := &logical.Response{} + if !tidyCertStore && !tidyRevokedCerts && !tidyRevokedAssocs { + resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true or tidy_revoked_cert_issuer_associations=true to start a tidy operation.") + } else { + resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.") + } + + return logical.RespondWithStatusCode(resp, req, http.StatusAccepted) +} + +func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) { go func() { defer atomic.StoreUint32(b.tidyCASGuard, 0) - b.tidyStatusStart(safetyBuffer, tidyCertStore, tidyRevokedCerts || tidyRevocationList) + b.tidyStatusStart(config) - // Don't cancel when the original client request goes away - ctx = context.Background() + // Don't cancel when the original client request goes away. + ctx := context.Background() logger := b.Logger().Named("tidy") doTidy := func() error { - if tidyCertStore { - serials, err := req.Storage.List(ctx, "certs/") - if err != nil { - return fmt.Errorf("error fetching list of certs: %w", err) + if config.CertStore { + if err := b.doTidyCertStore(ctx, req, logger, config); err != nil { + return err } - - serialCount := len(serials) - metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries"}, float32(serialCount)) - for i, serial := range serials { - b.tidyStatusMessage(fmt.Sprintf("Tidying certificate store: checking entry %d of %d", i, serialCount)) - metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_current_entry"}, float32(i)) - - certEntry, err := req.Storage.Get(ctx, "certs/"+serial) - if err != nil { - return fmt.Errorf("error fetching certificate %q: %w", serial, err) - } - - if certEntry == nil { - logger.Warn("certificate entry is nil; tidying up since it is no longer useful for any server operations", "serial", serial) - if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { - return fmt.Errorf("error deleting nil entry with serial %s: %w", serial, err) - } - b.tidyStatusIncCertStoreCount() - continue - } - - if certEntry.Value == nil || len(certEntry.Value) == 0 { - logger.Warn("certificate entry has no value; tidying up since it is no longer useful for any server operations", "serial", serial) - if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { - return fmt.Errorf("error deleting entry with nil value with serial %s: %w", serial, err) - } - b.tidyStatusIncCertStoreCount() - continue - } - - cert, err := x509.ParseCertificate(certEntry.Value) - if err != nil { - return fmt.Errorf("unable to parse stored certificate with serial %q: %w", serial, err) - } - - if time.Now().After(cert.NotAfter.Add(bufferDuration)) { - if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { - return fmt.Errorf("error deleting serial %q from storage: %w", serial, err) - } - b.tidyStatusIncCertStoreCount() - } - } - metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries_remaining"}, float32(uint(serialCount)-b.tidyStatus.certStoreDeletedCount)) } - if tidyRevokedCerts || tidyRevocationList { - b.revokeStorageLock.Lock() - defer b.revokeStorageLock.Unlock() - - rebuildCRL := false - - revokedSerials, err := req.Storage.List(ctx, "revoked/") - if err != nil { - return fmt.Errorf("error fetching list of revoked certs: %w", err) - } - - revokedSerialsCount := len(revokedSerials) - metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries"}, float32(revokedSerialsCount)) - - var revInfo revocationInfo - for i, serial := range revokedSerials { - b.tidyStatusMessage(fmt.Sprintf("Tidying revoked certificates: checking certificate %d of %d", i, len(revokedSerials))) - metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_current_entry"}, float32(i)) - - revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial) - if err != nil { - return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err) - } - - if revokedEntry == nil { - logger.Warn("revoked entry is nil; tidying up since it is no longer useful for any server operations", "serial", serial) - if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { - return fmt.Errorf("error deleting nil revoked entry with serial %s: %w", serial, err) - } - b.tidyStatusIncRevokedCertCount() - continue - } - - if revokedEntry.Value == nil || len(revokedEntry.Value) == 0 { - logger.Warn("revoked entry has nil value; tidying up since it is no longer useful for any server operations", "serial", serial) - if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { - return fmt.Errorf("error deleting revoked entry with nil value with serial %s: %w", serial, err) - } - b.tidyStatusIncRevokedCertCount() - continue - } - - err = revokedEntry.DecodeJSON(&revInfo) - if err != nil { - return fmt.Errorf("error decoding revocation entry for serial %q: %w", serial, err) - } - - revokedCert, err := x509.ParseCertificate(revInfo.CertificateBytes) - if err != nil { - return fmt.Errorf("unable to parse stored revoked certificate with serial %q: %w", serial, err) - } - - // Only remove the entries from revoked/ and certs/ if we're - // past its NotAfter value. This is because we use the - // information on revoked/ to build the CRL and the - // information on certs/ for lookup. - if time.Now().After(revokedCert.NotAfter.Add(bufferDuration)) { - if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { - return fmt.Errorf("error deleting serial %q from revoked list: %w", serial, err) - } - if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { - return fmt.Errorf("error deleting serial %q from store when tidying revoked: %w", serial, err) - } - rebuildCRL = true - b.tidyStatusIncRevokedCertCount() - } - } - metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries_remaining"}, float32(uint(revokedSerialsCount)-b.tidyStatus.revokedCertDeletedCount)) - if rebuildCRL { - // Expired certificates isn't generally an important - // reason to trigger a CRL rebuild for. Check if - // automatic CRL rebuilds have been enabled and defer - // the rebuild if so. - sc := b.makeStorageContext(ctx, req.Storage) - config, err := sc.getRevocationConfig() - if err != nil { - return err - } - - if !config.AutoRebuild { - if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil { - return err - } - } + if config.RevokedCerts || config.IssuerAssocs { + if err := b.doTidyRevocationStore(ctx, req, logger, config); err != nil { + return nil } } @@ -254,15 +165,192 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr b.tidyStatusStop(nil) } }() +} - resp := &logical.Response{} - if !tidyCertStore && !tidyRevokedCerts && !tidyRevocationList { - resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true to start a tidy operation.") - } else { - resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.") +func (b *backend) doTidyCertStore(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error { + serials, err := req.Storage.List(ctx, "certs/") + if err != nil { + return fmt.Errorf("error fetching list of certs: %w", err) } - return logical.RespondWithStatusCode(resp, req, http.StatusAccepted) + serialCount := len(serials) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries"}, float32(serialCount)) + for i, serial := range serials { + b.tidyStatusMessage(fmt.Sprintf("Tidying certificate store: checking entry %d of %d", i, serialCount)) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_current_entry"}, float32(i)) + + certEntry, err := req.Storage.Get(ctx, "certs/"+serial) + if err != nil { + return fmt.Errorf("error fetching certificate %q: %w", serial, err) + } + + if certEntry == nil { + logger.Warn("certificate entry is nil; tidying up since it is no longer useful for any server operations", "serial", serial) + if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { + return fmt.Errorf("error deleting nil entry with serial %s: %w", serial, err) + } + b.tidyStatusIncCertStoreCount() + continue + } + + if certEntry.Value == nil || len(certEntry.Value) == 0 { + logger.Warn("certificate entry has no value; tidying up since it is no longer useful for any server operations", "serial", serial) + if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { + return fmt.Errorf("error deleting entry with nil value with serial %s: %w", serial, err) + } + b.tidyStatusIncCertStoreCount() + continue + } + + cert, err := x509.ParseCertificate(certEntry.Value) + if err != nil { + return fmt.Errorf("unable to parse stored certificate with serial %q: %w", serial, err) + } + + if time.Now().After(cert.NotAfter.Add(config.SafetyBuffer)) { + if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { + return fmt.Errorf("error deleting serial %q from storage: %w", serial, err) + } + b.tidyStatusIncCertStoreCount() + } + } + + metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries_remaining"}, float32(uint(serialCount)-b.tidyStatus.certStoreDeletedCount)) + + return nil +} + +func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error { + b.revokeStorageLock.Lock() + defer b.revokeStorageLock.Unlock() + + // Fetch and parse our issuers so we can associate them if necessary. + sc := b.makeStorageContext(ctx, req.Storage) + issuerIDCertMap, err := fetchIssuerMapForRevocationChecking(sc) + if err != nil { + return err + } + + rebuildCRL := false + + revokedSerials, err := req.Storage.List(ctx, "revoked/") + if err != nil { + return fmt.Errorf("error fetching list of revoked certs: %w", err) + } + + revokedSerialsCount := len(revokedSerials) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries"}, float32(revokedSerialsCount)) + + fixedIssuers := 0 + + var revInfo revocationInfo + for i, serial := range revokedSerials { + b.tidyStatusMessage(fmt.Sprintf("Tidying revoked certificates: checking certificate %d of %d", i, len(revokedSerials))) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_current_entry"}, float32(i)) + + revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial) + if err != nil { + return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err) + } + + if revokedEntry == nil { + logger.Warn("revoked entry is nil; tidying up since it is no longer useful for any server operations", "serial", serial) + if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { + return fmt.Errorf("error deleting nil revoked entry with serial %s: %w", serial, err) + } + b.tidyStatusIncRevokedCertCount() + continue + } + + if revokedEntry.Value == nil || len(revokedEntry.Value) == 0 { + logger.Warn("revoked entry has nil value; tidying up since it is no longer useful for any server operations", "serial", serial) + if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { + return fmt.Errorf("error deleting revoked entry with nil value with serial %s: %w", serial, err) + } + b.tidyStatusIncRevokedCertCount() + continue + } + + err = revokedEntry.DecodeJSON(&revInfo) + if err != nil { + return fmt.Errorf("error decoding revocation entry for serial %q: %w", serial, err) + } + + revokedCert, err := x509.ParseCertificate(revInfo.CertificateBytes) + if err != nil { + return fmt.Errorf("unable to parse stored revoked certificate with serial %q: %w", serial, err) + } + + // Tidy operations over revoked certs should execute prior to + // tidyRevokedCerts as that may remove the entry. If that happens, + // we won't persist the revInfo changes (as it was deleted instead). + var storeCert bool + if config.IssuerAssocs { + if !isRevInfoIssuerValid(&revInfo, issuerIDCertMap) { + b.tidyStatusIncMissingIssuerCertCount() + revInfo.CertificateIssuer = issuerID("") + storeCert = true + if associateRevokedCertWithIsssuer(&revInfo, revokedCert, issuerIDCertMap) { + fixedIssuers += 1 + } + } + } + + if config.RevokedCerts { + // Only remove the entries from revoked/ and certs/ if we're + // past its NotAfter value. This is because we use the + // information on revoked/ to build the CRL and the + // information on certs/ for lookup. + if time.Now().After(revokedCert.NotAfter.Add(config.SafetyBuffer)) { + if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { + return fmt.Errorf("error deleting serial %q from revoked list: %w", serial, err) + } + if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { + return fmt.Errorf("error deleting serial %q from store when tidying revoked: %w", serial, err) + } + rebuildCRL = true + storeCert = false + b.tidyStatusIncRevokedCertCount() + } + } + + // If the entry wasn't removed but was otherwise modified, + // go ahead and write it back out. + if storeCert { + revokedEntry, err = logical.StorageEntryJSON("revoked/"+serial, revInfo) + if err != nil { + return fmt.Errorf("error building entry to persist changes to serial %v from revoked list: %v", serial, err) + } + + err = req.Storage.Put(ctx, revokedEntry) + if err != nil { + return fmt.Errorf("error persisting changes to serial %v from revoked list: %v", serial, err) + } + } + } + + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries_remaining"}, float32(uint(revokedSerialsCount)-b.tidyStatus.revokedCertDeletedCount)) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_entries_incorrect_issuers"}, float32(b.tidyStatus.missingIssuerCertCount)) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_entries_fixed_issuers"}, float32(fixedIssuers)) + + if rebuildCRL { + // Expired certificates isn't generally an important + // reason to trigger a CRL rebuild for. Check if + // automatic CRL rebuilds have been enabled and defer + // the rebuild if so. + config, err := sc.getRevocationConfig() + if err != nil { + return err + } + + if !config.AutoRebuild { + if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil { + return err + } + } + } + + return nil } func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *framework.FieldData) (*logical.Response, error) { @@ -287,6 +375,7 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f "message": nil, "cert_store_deleted_count": nil, "revoked_cert_deleted_count": nil, + "missing_issuer_cert_count": nil, }, } @@ -297,10 +386,12 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f resp.Data["safety_buffer"] = b.tidyStatus.safetyBuffer resp.Data["tidy_cert_store"] = b.tidyStatus.tidyCertStore resp.Data["tidy_revoked_certs"] = b.tidyStatus.tidyRevokedCerts + resp.Data["tidy_revoked_cert_issuer_associations"] = b.tidyStatus.tidyRevokedAssocs resp.Data["time_started"] = b.tidyStatus.timeStarted resp.Data["message"] = b.tidyStatus.message resp.Data["cert_store_deleted_count"] = b.tidyStatus.certStoreDeletedCount resp.Data["revoked_cert_deleted_count"] = b.tidyStatus.revokedCertDeletedCount + resp.Data["missing_issuer_cert_count"] = b.tidyStatus.missingIssuerCertCount switch b.tidyStatus.state { case tidyStatusStarted: @@ -320,16 +411,17 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f return resp, nil } -func (b *backend) tidyStatusStart(safetyBuffer int, tidyCertStore, tidyRevokedCerts bool) { +func (b *backend) tidyStatusStart(config *tidyConfig) { b.tidyStatusLock.Lock() defer b.tidyStatusLock.Unlock() b.tidyStatus = &tidyStatus{ - safetyBuffer: safetyBuffer, - tidyCertStore: tidyCertStore, - tidyRevokedCerts: tidyRevokedCerts, - state: tidyStatusStarted, - timeStarted: time.Now(), + safetyBuffer: int(config.SafetyBuffer / time.Second), + tidyCertStore: config.CertStore, + tidyRevokedCerts: config.RevokedCerts, + tidyRevokedAssocs: config.IssuerAssocs, + state: tidyStatusStarted, + timeStarted: time.Now(), } metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, float32(b.tidyStatus.timeStarted.Unix())) @@ -380,6 +472,13 @@ func (b *backend) tidyStatusIncRevokedCertCount() { b.tidyStatus.revokedCertDeletedCount++ } +func (b *backend) tidyStatusIncMissingIssuerCertCount() { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus.missingIssuerCertCount++ +} + const pathTidyHelpSyn = ` Tidy up the backend by removing expired certificates, revocation information, or both. @@ -419,6 +518,7 @@ The result includes the following fields: * 'safety_buffer': the value of this parameter when initiating the tidy operation * 'tidy_cert_store': the value of this parameter when initiating the tidy operation * 'tidy_revoked_certs': the value of this parameter when initiating the tidy operation +* 'tidy_revoked_cert_issuer_associations': the value of this parameter when initiating the tidy operation * 'state': one of "Inactive", "Running", "Finished", "Error" * 'error': the error message, if the operation ran into an error * 'time_started': the time the operation started @@ -427,4 +527,5 @@ The result includes the following fields: "Tidying revoked certificates: checking certificate N of TOTAL" * 'cert_store_deleted_count': The number of certificate storage entries deleted * 'revoked_cert_deleted_count': The number of revoked certificate entries deleted +* 'missing_issuer_cert_count': The number of revoked certificates which were missing a valid issuer reference ` diff --git a/changelog/16871.txt b/changelog/16871.txt new file mode 100644 index 000000000..8b57c78e8 --- /dev/null +++ b/changelog/16871.txt @@ -0,0 +1,3 @@ +```release-note:improvement +secrets/pki: Allow tidy to associate revoked certs with their issuers for OCSP performance +``` diff --git a/website/content/api-docs/secret/pki.mdx b/website/content/api-docs/secret/pki.mdx index 3e53c6a7f..e49236441 100644 --- a/website/content/api-docs/secret/pki.mdx +++ b/website/content/api-docs/secret/pki.mdx @@ -3123,16 +3123,21 @@ expiration time. #### Parameters -- `tidy_cert_store` `(bool: false)` Specifies whether to tidy up the certificate +- `tidy_cert_store` `(bool: false)` - Specifies whether to tidy up the certificate store. -- `tidy_revoked_certs` `(bool: false)` Set to true to remove all invalid and +- `tidy_revoked_certs` `(bool: false)` - Set to true to remove all invalid and expired certificates from storage. A revoked storage entry is considered invalid if the entry is empty, or the value within the entry is empty. If a certificate is removed due to expiry, the entry will also be removed from the CRL, and the CRL will be rotated. -- `safety_buffer` `(string: "")` Specifies a duration using [duration format strings](/docs/concepts/duration-format) +- `tidy_revoked_cert_issuer_associations` `(bool: false)` - Set to true to associate + revoked certificates with their corresponding issuers; this improves the + performance of OCSP and CRL building, by shifting work to a tidy operation + instead. + +- `safety_buffer` `(string: "")` - Specifies a duration using [duration format strings](/docs/concepts/duration-format) used as a safety buffer to ensure certificates are not expunged prematurely; as an example, this can keep certificates from being removed from the CRL that, due to clock skew, might still be considered valid on other hosts. For a certificate to be expunged, diff --git a/website/content/docs/internals/telemetry.mdx b/website/content/docs/internals/telemetry.mdx index a830f3cf0..f20f224ae 100644 --- a/website/content/docs/internals/telemetry.mdx +++ b/website/content/docs/internals/telemetry.mdx @@ -321,8 +321,10 @@ These metrics relate to the supported [secrets engines][secrets-engines]. | `secrets.pki.tidy.revoked_cert_deleted_count` | Number of entries deleted from the certificate store for revoked certificates | entry | counter | | `secrets.pki.tidy.revoked_cert_total_entries` | Number of entries in the certificate store for revoked certificates to verify during the tidy operation | entry | gauge | | `secrets.pki.tidy.revoked_cert_total_entries_remaining` | Number of entries in the certificate store for revoked certificates that are left after the tidy operation (checked but not removed). | entry | gauge | +| `secrets.pki.tidy.revoked_cert_total_entries_incorrect_issuers` | Number of entries in the certificate store which had incorrect issuer information (total). | entry | gauge | +| `secrets.pki.tidy.revoked_cert_total_entries_fixed_issuers` | Number of entries in the certificate store which had incorrect issuer information that was fixed during this tidy operation. | entry | gauge | | `secrets.pki.tidy.start_time_epoch` | Start time (as seconds since Jan 1 1970) when the PKI tidy operation is active, 0 otherwise | seconds | gauge | -| `secrets.pki.tidy.success` | Number of times the PKI tidy operation has completed succcessfully | operations | counter | +| `secrets.pki.tidy.success` | Number of times the PKI tidy operation has completed successfully. | operations | counter | | `vault.secret.kv.count` (cluster, namespace, mount_point) | Number of entries in each key-value secret engine. | paths | gauge | | `vault.secret.lease.creation` (cluster, namespace, secret_engine, mount_point, creation_ttl) | Counts the number of leases created by secret engines. | leases | counter |