diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go index ba3f093fd..c92dfa949 100644 --- a/builtin/logical/pki/backend.go +++ b/builtin/logical/pki/backend.go @@ -259,11 +259,13 @@ const ( type tidyStatus struct { // Parameters used to initiate the operation - safetyBuffer int - tidyCertStore bool - tidyRevokedCerts bool - tidyRevokedAssocs bool - pauseDuration string + safetyBuffer int + issuerSafetyBuffer int + tidyCertStore bool + tidyRevokedCerts bool + tidyRevokedAssocs bool + tidyExpiredIssuers bool + pauseDuration string // Status state tidyStatusState diff --git a/builtin/logical/pki/backend_test.go b/builtin/logical/pki/backend_test.go index 282d670a7..9b41a9cf7 100644 --- a/builtin/logical/pki/backend_test.go +++ b/builtin/logical/pki/backend_test.go @@ -3936,9 +3936,11 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) { } expectedData := map[string]interface{}{ "safety_buffer": json.Number("1"), + "issuer_safety_buffer": json.Number("31536000"), "tidy_cert_store": true, "tidy_revoked_certs": true, "tidy_revoked_cert_issuer_associations": false, + "tidy_expired_issuers": false, "pause_duration": "0s", "state": "Finished", "error": nil, diff --git a/builtin/logical/pki/fields.go b/builtin/logical/pki/fields.go index 497ea8893..78cdd67e8 100644 --- a/builtin/logical/pki/fields.go +++ b/builtin/logical/pki/fields.go @@ -454,6 +454,13 @@ on revocation entries. This helps increase the performance of CRL building and OCSP responses.`, } + fields["tidy_expired_issuers"] = &framework.FieldSchema{ + Type: framework.TypeBool, + Description: `Set to true to automatically remove expired issuers +past the issuer_safety_buffer. No keys will be removed as part of this +operation.`, + } + fields["safety_buffer"] = &framework.FieldSchema{ Type: framework.TypeDurationSecond, Description: `The amount of extra time that must have passed @@ -463,6 +470,15 @@ Defaults to 72 hours.`, Default: int(defaultTidyConfig.SafetyBuffer / time.Second), // TypeDurationSecond currently requires defaults to be int } + fields["issuer_safety_buffer"] = &framework.FieldSchema{ + Type: framework.TypeDurationSecond, + Description: `The amount of extra time that must have passed +beyond issuer's expiration before it is removed +from the backend storage. +Defaults to 8760 hours (1 year).`, + Default: int(defaultTidyConfig.IssuerSafetyBuffer / time.Second), // TypeDurationSecond currently requires defaults to be int + } + fields["pause_duration"] = &framework.FieldSchema{ Type: framework.TypeString, Description: `The amount of time to wait between processing diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go index 82970d3aa..ae5f160ec 100644 --- a/builtin/logical/pki/path_tidy.go +++ b/builtin/logical/pki/path_tidy.go @@ -13,29 +13,34 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/sdk/framework" + "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/logical" ) var tidyCancelledError = errors.New("tidy operation cancelled") type tidyConfig struct { - Enabled bool `json:"enabled"` - Interval time.Duration `json:"interval_duration"` - CertStore bool `json:"tidy_cert_store"` - RevokedCerts bool `json:"tidy_revoked_certs"` - IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"` - SafetyBuffer time.Duration `json:"safety_buffer"` - PauseDuration time.Duration `json:"pause_duration"` + Enabled bool `json:"enabled"` + Interval time.Duration `json:"interval_duration"` + CertStore bool `json:"tidy_cert_store"` + RevokedCerts bool `json:"tidy_revoked_certs"` + IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"` + ExpiredIssuers bool `json:"tidy_expired_issuers"` + SafetyBuffer time.Duration `json:"safety_buffer"` + IssuerSafetyBuffer time.Duration `json:"issuer_safety_buffer"` + PauseDuration time.Duration `json:"pause_duration"` } var defaultTidyConfig = tidyConfig{ - Enabled: false, - Interval: 12 * time.Hour, - CertStore: false, - RevokedCerts: false, - IssuerAssocs: false, - SafetyBuffer: 72 * time.Hour, - PauseDuration: 0 * time.Second, + Enabled: false, + Interval: 12 * time.Hour, + CertStore: false, + RevokedCerts: false, + IssuerAssocs: false, + ExpiredIssuers: false, + SafetyBuffer: 72 * time.Hour, + IssuerSafetyBuffer: 365 * 24 * time.Hour, + PauseDuration: 0 * time.Second, } func pathTidy(b *backend) *framework.Path { @@ -116,6 +121,8 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr tidyCertStore := d.Get("tidy_cert_store").(bool) tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool) || d.Get("tidy_revocation_list").(bool) tidyRevokedAssocs := d.Get("tidy_revoked_cert_issuer_associations").(bool) + tidyExpiredIssuers := d.Get("tidy_expired_issuers").(bool) + issuerSafetyBuffer := d.Get("issuer_safety_buffer").(int) pauseDurationStr := d.Get("pause_duration").(string) pauseDuration := 0 * time.Second @@ -123,6 +130,10 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr return logical.ErrorResponse("safety_buffer must be greater than zero"), nil } + if issuerSafetyBuffer < 1 { + return logical.ErrorResponse("issuer_safety_buffer must be greater than zero"), nil + } + if pauseDurationStr != "" { var err error pauseDuration, err = time.ParseDuration(pauseDurationStr) @@ -136,16 +147,19 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr } bufferDuration := time.Duration(safetyBuffer) * time.Second + issuerBufferDuration := time.Duration(issuerSafetyBuffer) * time.Second // Manual run with constructed configuration. config := &tidyConfig{ - Enabled: true, - Interval: 0 * time.Second, - CertStore: tidyCertStore, - RevokedCerts: tidyRevokedCerts, - IssuerAssocs: tidyRevokedAssocs, - SafetyBuffer: bufferDuration, - PauseDuration: pauseDuration, + Enabled: true, + Interval: 0 * time.Second, + CertStore: tidyCertStore, + RevokedCerts: tidyRevokedCerts, + IssuerAssocs: tidyRevokedAssocs, + ExpiredIssuers: tidyExpiredIssuers, + SafetyBuffer: bufferDuration, + IssuerSafetyBuffer: issuerBufferDuration, + PauseDuration: pauseDuration, } if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) { @@ -170,8 +184,8 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr b.startTidyOperation(req, config) resp := &logical.Response{} - if !tidyCertStore && !tidyRevokedCerts && !tidyRevokedAssocs { - resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true or tidy_revoked_cert_issuer_associations=true to start a tidy operation.") + if !tidyCertStore && !tidyRevokedCerts && !tidyRevokedAssocs && !tidyExpiredIssuers { + resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true or tidy_revoked_cert_issuer_associations=true or tidy_expired_issuers=true to start a tidy operation.") } else { resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.") } @@ -209,6 +223,12 @@ func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) { } } + if config.ExpiredIssuers { + if err := b.doTidyExpiredIssuers(ctx, req, logger, config); err != nil { + return err + } + } + return nil } @@ -440,6 +460,111 @@ func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Reques return nil } +func (b *backend) doTidyExpiredIssuers(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error { + if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) || + (!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) { + b.Logger().Debug("skipping expired issuer tidy as we're not on the primary or secondary with a local mount") + return nil + } + + // Short-circuit to avoid having to deal with the legacy mounts. While we + // could handle this case and remove these issuers, its somewhat + // unexpected behavior and we'd prefer to finish the migration first. + if b.useLegacyBundleCaStorage() { + return nil + } + + b.issuersLock.Lock() + defer b.issuersLock.Unlock() + + // Fetch and parse our issuers so we have their expiration date. + sc := b.makeStorageContext(ctx, req.Storage) + issuerIDCertMap, err := fetchIssuerMapForRevocationChecking(sc) + if err != nil { + return err + } + + // Fetch the issuer config to find the default; we don't want to remove + // the current active issuer automatically. + iConfig, err := sc.getIssuersConfig() + if err != nil { + return err + } + + // We want certificates which have expired before this date by a given + // safety buffer. So we subtract the buffer from now, and anything which + // has expired before our after buffer can be tidied, and anything that + // expired after this buffer must be kept. + now := time.Now() + afterBuffer := now.Add(-1 * config.IssuerSafetyBuffer) + + rebuildChainsAndCRL := false + + for issuer, cert := range issuerIDCertMap { + if cert.NotAfter.After(afterBuffer) { + continue + } + + entry, err := sc.fetchIssuerById(issuer) + if err != nil { + return nil + } + + // This issuer's certificate has expired. We explicitly persist the + // key, but log both the certificate and the keyId to the + // informational logs so an admin can recover the removed cert if + // necessary or remove the key (and know which cert it belonged to), + // if desired. + msg := "[Tidy on mount: %v] Issuer %v has expired by %v and is being removed." + idAndName := fmt.Sprintf("[id:%v/name:%v]", entry.ID, entry.Name) + msg = fmt.Sprintf(msg, b.backendUUID, idAndName, config.IssuerSafetyBuffer) + + // Before we log, check if we're the default. While this is late, and + // after we read it from storage, we have more info here to tell the + // user that their default has expired AND has passed the safety + // buffer. + if iConfig.DefaultIssuerId == issuer { + msg = "[Tidy on mount: %v] Issuer %v has expired and would be removed via tidy, but won't be, as it is currently the default issuer." + msg = fmt.Sprintf(msg, b.backendUUID, idAndName) + b.Logger().Warn(msg) + continue + } + + // Log the above message.. + b.Logger().Info(msg, "serial_number", entry.SerialNumber, "key_id", entry.KeyID, "certificate", entry.Certificate) + + wasDefault, err := sc.deleteIssuer(issuer) + if err != nil { + b.Logger().Error(fmt.Sprintf("failed to remove %v: %v", idAndName, err)) + return err + } + if wasDefault { + b.Logger().Warn(fmt.Sprintf("expired issuer %v was default; it is strongly encouraged to choose a new default issuer for backwards compatibility", idAndName)) + } + + rebuildChainsAndCRL = true + } + + if rebuildChainsAndCRL { + // When issuers are removed, there's a chance chains change as a + // result; remove them. + if err := sc.rebuildIssuersChains(nil); err != nil { + return err + } + + // Removal of issuers is generally a good reason to rebuild the CRL, + // even if auto-rebuild is enabled. + b.revokeStorageLock.Lock() + defer b.revokeStorageLock.Unlock() + + if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil { + return err + } + } + + return nil +} + func (b *backend) pathTidyCancelWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { if atomic.LoadUint32(b.tidyCASGuard) == 0 { resp := &logical.Response{} @@ -470,9 +595,11 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f resp := &logical.Response{ Data: map[string]interface{}{ "safety_buffer": nil, + "issuer_safety_buffer": nil, "tidy_cert_store": nil, "tidy_revoked_certs": nil, "tidy_revoked_cert_issuer_associations": nil, + "tidy_expired_issuers": nil, "pause_duration": nil, "state": "Inactive", "error": nil, @@ -492,9 +619,11 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f } resp.Data["safety_buffer"] = b.tidyStatus.safetyBuffer + resp.Data["issuer_safety_buffer"] = b.tidyStatus.issuerSafetyBuffer resp.Data["tidy_cert_store"] = b.tidyStatus.tidyCertStore resp.Data["tidy_revoked_certs"] = b.tidyStatus.tidyRevokedCerts resp.Data["tidy_revoked_cert_issuer_associations"] = b.tidyStatus.tidyRevokedAssocs + resp.Data["tidy_expired_issuers"] = b.tidyStatus.tidyExpiredIssuers resp.Data["pause_duration"] = b.tidyStatus.pauseDuration resp.Data["time_started"] = b.tidyStatus.timeStarted resp.Data["message"] = b.tidyStatus.message @@ -547,7 +676,9 @@ func (b *backend) pathConfigAutoTidyRead(ctx context.Context, req *logical.Reque "tidy_cert_store": config.CertStore, "tidy_revoked_certs": config.RevokedCerts, "tidy_revoked_cert_issuer_associations": config.IssuerAssocs, + "tidy_expired_issuers": config.ExpiredIssuers, "safety_buffer": int(config.SafetyBuffer / time.Second), + "issuer_safety_buffer": int(config.IssuerSafetyBuffer / time.Second), "pause_duration": config.PauseDuration.String(), }, }, nil @@ -613,11 +744,13 @@ func (b *backend) tidyStatusStart(config *tidyConfig) { defer b.tidyStatusLock.Unlock() b.tidyStatus = &tidyStatus{ - safetyBuffer: int(config.SafetyBuffer / time.Second), - tidyCertStore: config.CertStore, - tidyRevokedCerts: config.RevokedCerts, - tidyRevokedAssocs: config.IssuerAssocs, - pauseDuration: config.PauseDuration.String(), + safetyBuffer: int(config.SafetyBuffer / time.Second), + issuerSafetyBuffer: int(config.IssuerSafetyBuffer / time.Second), + tidyCertStore: config.CertStore, + tidyRevokedCerts: config.RevokedCerts, + tidyRevokedAssocs: config.IssuerAssocs, + tidyExpiredIssuers: config.ExpiredIssuers, + pauseDuration: config.PauseDuration.String(), state: tidyStatusStarted, timeStarted: time.Now(), diff --git a/builtin/logical/pki/path_tidy_test.go b/builtin/logical/pki/path_tidy_test.go index be950f865..f00def8cc 100644 --- a/builtin/logical/pki/path_tidy_test.go +++ b/builtin/logical/pki/path_tidy_test.go @@ -264,3 +264,110 @@ func TestTidyCancellation(t *testing.T) { t.Fatalf("expected to only process at most 3 more certificates, but processed (%v >>> %v) certs", nowMany, howMany) } } + +func TestTidyIssuers(t *testing.T) { + t.Parallel() + + b, s := createBackendWithStorage(t) + + // Create a root that expires quickly and one valid for longer. + _, err := CBWrite(b, s, "root/generate/internal", map[string]interface{}{ + "common_name": "root1 example.com", + "issuer_name": "root-expired", + "ttl": "1s", + "key_type": "ec", + }) + require.NoError(t, err) + + _, err = CBWrite(b, s, "root/generate/internal", map[string]interface{}{ + "common_name": "root2 example.com", + "issuer_name": "root-valid", + "ttl": "60m", + "key_type": "rsa", + }) + require.NoError(t, err) + + // Sleep long enough to expire the root. + time.Sleep(2 * time.Second) + + // First tidy run shouldn't remove anything; too long of safety buffer. + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_expired_issuers": true, + "issuer_safety_buffer": "60m", + }) + require.NoError(t, err) + + // Wait for tidy to finish. + time.Sleep(2 * time.Second) + + // Expired issuer should exist. + resp, err := CBRead(b, s, "issuer/root-expired") + requireSuccessNonNilResponse(t, resp, err, "expired should still be present") + resp, err = CBRead(b, s, "issuer/root-valid") + requireSuccessNonNilResponse(t, resp, err, "valid should still be present") + + // Second tidy run with shorter safety buffer shouldn't remove the + // expired one, as it should be the default issuer. + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_expired_issuers": true, + "issuer_safety_buffer": "1s", + }) + require.NoError(t, err) + + // Wait for tidy to finish. + time.Sleep(2 * time.Second) + + // Expired issuer should still exist. + resp, err = CBRead(b, s, "issuer/root-expired") + requireSuccessNonNilResponse(t, resp, err, "expired should still be present") + resp, err = CBRead(b, s, "issuer/root-valid") + requireSuccessNonNilResponse(t, resp, err, "valid should still be present") + + // Update the default issuer. + _, err = CBWrite(b, s, "config/issuers", map[string]interface{}{ + "default": "root-valid", + }) + require.NoError(t, err) + + // Third tidy run should remove the expired one. + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_expired_issuers": true, + "issuer_safety_buffer": "1s", + }) + require.NoError(t, err) + + // Wait for tidy to finish. + time.Sleep(2 * time.Second) + + // Valid issuer should exist still; other should be removed. + resp, err = CBRead(b, s, "issuer/root-expired") + require.Error(t, err) + require.Nil(t, resp) + resp, err = CBRead(b, s, "issuer/root-valid") + requireSuccessNonNilResponse(t, resp, err, "valid should still be present") + + // Finally, one more tidy should cause no changes. + _, err = CBWrite(b, s, "tidy", map[string]interface{}{ + "tidy_expired_issuers": true, + "issuer_safety_buffer": "1s", + }) + require.NoError(t, err) + + // Wait for tidy to finish. + time.Sleep(2 * time.Second) + + // Valid issuer should exist still; other should be removed. + resp, err = CBRead(b, s, "issuer/root-expired") + require.Error(t, err) + require.Nil(t, resp) + resp, err = CBRead(b, s, "issuer/root-valid") + requireSuccessNonNilResponse(t, resp, err, "valid should still be present") + + // Ensure we have safety buffer and expired issuers set correctly. + statusResp, err := CBRead(b, s, "tidy-status") + require.NoError(t, err) + require.NotNil(t, statusResp) + require.NotNil(t, statusResp.Data) + require.Equal(t, statusResp.Data["issuer_safety_buffer"], 1) + require.Equal(t, statusResp.Data["tidy_expired_issuers"], true) +} diff --git a/builtin/logical/pki/storage.go b/builtin/logical/pki/storage.go index fc1c83e32..e55ea4121 100644 --- a/builtin/logical/pki/storage.go +++ b/builtin/logical/pki/storage.go @@ -1222,6 +1222,10 @@ func (sc *storageContext) getAutoTidyConfig() (*tidyConfig, error) { return nil, err } + if result.IssuerSafetyBuffer == 0 { + result.IssuerSafetyBuffer = defaultTidyConfig.IssuerSafetyBuffer + } + return &result, nil } diff --git a/changelog/17823.txt b/changelog/17823.txt new file mode 100644 index 000000000..d999c8794 --- /dev/null +++ b/changelog/17823.txt @@ -0,0 +1,3 @@ +```release-note:improvement +secrets/pki: Allow tidying of expired issuer certificates. +``` diff --git a/website/content/api-docs/secret/pki.mdx b/website/content/api-docs/secret/pki.mdx index e937bbd08..a8851c73c 100644 --- a/website/content/api-docs/secret/pki.mdx +++ b/website/content/api-docs/secret/pki.mdx @@ -3325,6 +3325,15 @@ expiration time. performance of OCSP and CRL building, by shifting work to a tidy operation instead. +- `tidy_expired_issuers` `(bool: false)` - Set to true to automatically remove + expired issuers after the `issuer_safety_buffer` duration has elapsed. We + log the issuer certificate on removal to allow recovery; no keys are removed + during this process. + +~> Note: When the default issuer expires and is tidied, a new default must be + chosen manually by the operator; this process will not select a new one + automatically. + - `safety_buffer` `(string: "")` - Specifies a duration using [duration format strings](/docs/concepts/duration-format) used as a safety buffer to ensure certificates are not expunged prematurely; as an example, this can keep certificates from being removed from the CRL that, due to clock skew, might @@ -3332,6 +3341,10 @@ expiration time. the time must be after the expiration time of the certificate (according to the local clock) plus the duration of `safety_buffer`. Defaults to `72h`. +- `issuer_safety_buffer` `(string: "")` - Specifies a duration that issuers + should be kept for, past their `NotAfter` validity period. Defaults to + 365 days as hours (`8760h`). + - `pause_duration` `(string: "0s")` - Specifies the duration to pause between tidying individual certificates. This releases the revocation lock and allows other operations to continue while tidy is running. @@ -3340,6 +3353,8 @@ expiration time. between reading, parsing, and updates on-disk cert entries will be increased, decreasing resource utilization. + Does not affect `tidy_expired_issuers`. + ~> Note: Using too long of a `pause_duration` can result in tidy operations not concluding during this lifetime! Using too short of a pause duration (but non-zero) can lead to lock contention. Use [tidy's cancellation](#cancel-tidy)