diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go index 71c23e4b8..71cbd7b6f 100644 --- a/builtin/logical/pki/backend.go +++ b/builtin/logical/pki/backend.go @@ -132,6 +132,7 @@ func Backend(conf *logical.BackendConfig) *backend { pathRevokeWithKey(&b), pathTidy(&b), pathTidyStatus(&b), + pathConfigAutoTidy(&b), // Issuer APIs pathListIssuers(&b), @@ -191,6 +192,9 @@ func Backend(conf *logical.BackendConfig) *backend { b.crlBuilder = newCRLBuilder() + // Delay the first tidy until after we've started up. + b.lastTidy = time.Now() + return &b } @@ -204,6 +208,7 @@ type backend struct { tidyStatusLock sync.RWMutex tidyStatus *tidyStatus + lastTidy time.Time pkiStorageVersion atomic.Value crlBuilder *crlBuilder @@ -405,34 +410,105 @@ func (b *backend) invalidate(ctx context.Context, key string) { } func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) error { - // First attempt to reload the CRL configuration. sc := b.makeStorageContext(ctx, request.Storage) - if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil { - return err - } - // As we're (below) modifying the backing storage, we need to ensure - // we're not on a standby/secondary node. - if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) || - b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) { + doCRL := func() error { + // First attempt to reload the CRL configuration. + if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil { + return err + } + + // As we're (below) modifying the backing storage, we need to ensure + // we're not on a standby/secondary node. + if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) || + b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) { + return nil + } + + // Check if we're set to auto rebuild and a CRL is set to expire. + if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil { + return err + } + + // Then attempt to rebuild the CRLs if required. + if err := b.crlBuilder.rebuildIfForced(ctx, b, request); err != nil { + return err + } + + // If a delta CRL was rebuilt above as part of the complete CRL rebuild, + // this will be a no-op. However, if we do need to rebuild delta CRLs, + // this would cause us to do so. + if err := b.crlBuilder.rebuildDeltaCRLsIfForced(sc); err != nil { + return err + } + return nil } - // Check if we're set to auto rebuild and a CRL is set to expire. - if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil { - return err + doAutoTidy := func() error { + // As we're (below) modifying the backing storage, we need to ensure + // we're not on a standby/secondary node. + if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) || + b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) { + return nil + } + + config, err := sc.getAutoTidyConfig() + if err != nil { + return err + } + + if !config.Enabled || config.Interval <= 0*time.Second { + return nil + } + + // Check if we should run another tidy... + now := time.Now() + b.tidyStatusLock.RLock() + nextOp := b.lastTidy.Add(config.Interval) + b.tidyStatusLock.RUnlock() + if now.Before(nextOp) { + return nil + } + + // Ensure a tidy isn't already running... If it is, we'll trigger + // again when the running one finishes. + if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) { + return nil + } + + // Prevent ourselves from starting another tidy operation while + // this one is still running. This operation runs in the background + // and has a separate error reporting mechanism. + b.tidyStatusLock.Lock() + b.lastTidy = now + b.tidyStatusLock.Unlock() + + // Because the request from the parent storage will be cleared at + // some point (and potentially reused) -- due to tidy executing in + // a background goroutine -- we need to copy the storage entry off + // of the backend instead. + backendReq := &logical.Request{ + Storage: b.storage, + } + + b.startTidyOperation(backendReq, config) + return nil } - // Then attempt to rebuild the CRLs if required. - if err := b.crlBuilder.rebuildIfForced(ctx, b, request); err != nil { - return err + crlErr := doCRL() + tidyErr := doAutoTidy() + + if crlErr != nil && tidyErr != nil { + return fmt.Errorf("Error building CRLs:\n - %v\n\nError running auto-tidy:\n - %v\n", crlErr, tidyErr) } - // If a delta CRL was rebuilt above as part of the complete CRL rebuild, - // this will be a no-op. However, if we do need to rebuild delta CRLs, - // this would cause us to do so. - if err := b.crlBuilder.rebuildDeltaCRLsIfForced(sc); err != nil { - return err + if crlErr != nil { + return fmt.Errorf("Error building CRLs:\n - %v\n", crlErr) + } + + if tidyErr != nil { + return fmt.Errorf("Error running auto-tidy:\n - %v\n", tidyErr) } // Check if the CRL was invalidated due to issuer swap and update diff --git a/builtin/logical/pki/crl_test.go b/builtin/logical/pki/crl_test.go index 44ab58b40..e918f4eb2 100644 --- a/builtin/logical/pki/crl_test.go +++ b/builtin/logical/pki/crl_test.go @@ -876,14 +876,11 @@ func TestAutoRebuild(t *testing.T) { // storage without barrier encryption. EnableRaw: true, } - oldPeriod := vault.SetRollbackPeriodForTesting(newPeriod) - cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{ + cluster := vault.CreateTestClusterWithRollbackPeriod(t, newPeriod, coreConfig, &vault.TestClusterOptions{ HandlerFunc: vaulthttp.Handler, }) - cluster.Start() defer cluster.Cleanup() client := cluster.Cores[0].Client - vault.SetRollbackPeriodForTesting(oldPeriod) // Mount PKI err := client.Sys().Mount("pki", &api.MountInput{ diff --git a/builtin/logical/pki/fields.go b/builtin/logical/pki/fields.go index 3b6ed9773..f8343b5b5 100644 --- a/builtin/logical/pki/fields.go +++ b/builtin/logical/pki/fields.go @@ -416,3 +416,41 @@ to the key.`, } return fields } + +func addTidyFields(fields map[string]*framework.FieldSchema) map[string]*framework.FieldSchema { + fields["tidy_cert_store"] = &framework.FieldSchema{ + Type: framework.TypeBool, + Description: `Set to true to enable tidying up +the certificate store`, + } + + fields["tidy_revocation_list"] = &framework.FieldSchema{ + Type: framework.TypeBool, + Description: `Deprecated; synonym for 'tidy_revoked_certs`, + } + + fields["tidy_revoked_certs"] = &framework.FieldSchema{ + Type: framework.TypeBool, + Description: `Set to true to expire all revoked +and expired certificates, removing them both from the CRL and from storage. The +CRL will be rotated if this causes any values to be removed.`, + } + + fields["tidy_revoked_cert_issuer_associations"] = &framework.FieldSchema{ + Type: framework.TypeBool, + Description: `Set to true to validate issuer associations +on revocation entries. This helps increase the performance of CRL building +and OCSP responses.`, + } + + fields["safety_buffer"] = &framework.FieldSchema{ + Type: framework.TypeDurationSecond, + Description: `The amount of extra time that must have passed +beyond certificate expiration before it is removed +from the backend storage and/or revocation list. +Defaults to 72 hours.`, + Default: 259200, // 72h, but TypeDurationSecond currently requires defaults to be int + } + + return fields +} diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go index 07027f090..583640ec6 100644 --- a/builtin/logical/pki/path_tidy.go +++ b/builtin/logical/pki/path_tidy.go @@ -17,58 +17,33 @@ import ( ) type tidyConfig struct { + Enabled bool `json:"enabled"` + Interval time.Duration `json:"interval_duration"` CertStore bool `json:"tidy_cert_store"` RevokedCerts bool `json:"tidy_revoked_certs"` IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"` SafetyBuffer time.Duration `json:"safety_buffer"` } +var defaultTidyConfig = tidyConfig{ + Enabled: false, + Interval: 12 * time.Hour, + CertStore: false, + RevokedCerts: false, + IssuerAssocs: false, + SafetyBuffer: 259200 * time.Second, +} + func pathTidy(b *backend) *framework.Path { return &framework.Path{ Pattern: "tidy$", - Fields: map[string]*framework.FieldSchema{ - "tidy_cert_store": { - Type: framework.TypeBool, - Description: `Set to true to enable tidying up -the certificate store`, - }, - - "tidy_revocation_list": { - Type: framework.TypeBool, - Description: `Deprecated; synonym for 'tidy_revoked_certs`, - }, - - "tidy_revoked_certs": { - Type: framework.TypeBool, - Description: `Set to true to expire all revoked -and expired certificates, removing them both from the CRL and from storage. The -CRL will be rotated if this causes any values to be removed.`, - }, - - "tidy_revoked_cert_issuer_associations": { - Type: framework.TypeBool, - Description: `Set to true to validate issuer associations -on revocation entries. This helps increase the performance of CRL building -and OCSP responses.`, - }, - - "safety_buffer": { - Type: framework.TypeDurationSecond, - Description: `The amount of extra time that must have passed -beyond certificate expiration before it is removed -from the backend storage and/or revocation list. -Defaults to 72 hours.`, - Default: 259200, // 72h, but TypeDurationSecond currently requires defaults to be int - }, - }, - + Fields: addTidyFields(map[string]*framework.FieldSchema{}), Operations: map[logical.Operation]framework.OperationHandler{ logical.UpdateOperation: &framework.PathOperation{ Callback: b.pathTidyWrite, ForwardPerformanceStandby: true, }, }, - HelpSynopsis: pathTidyHelpSyn, HelpDescription: pathTidyHelpDesc, } @@ -88,6 +63,36 @@ func pathTidyStatus(b *backend) *framework.Path { } } +func pathConfigAutoTidy(b *backend) *framework.Path { + return &framework.Path{ + Pattern: "config/auto-tidy", + Fields: addTidyFields(map[string]*framework.FieldSchema{ + "enabled": { + Type: framework.TypeBool, + Description: `Set to true to enable automatic tidy operations.`, + }, + "interval_duration": { + Type: framework.TypeDurationSecond, + Description: `Interval at which to run an auto-tidy operation. This is the time between tidy invocations (after one finishes to the start of the next). Running a manual tidy will reset this duration.`, + Default: 43200, // 32h, but TypeDurationSecond currently requires the default to be an int. + }, + }), + Operations: map[logical.Operation]framework.OperationHandler{ + logical.ReadOperation: &framework.PathOperation{ + Callback: b.pathConfigAutoTidyRead, + }, + logical.UpdateOperation: &framework.PathOperation{ + Callback: b.pathConfigAutoTidyWrite, + // Read more about why these flags are set in backend.go. + ForwardPerformanceStandby: true, + ForwardPerformanceSecondary: true, + }, + }, + HelpSynopsis: pathConfigAutoTidySyn, + HelpDescription: pathConfigAutoTidyDesc, + } +} + func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { safetyBuffer := d.Get("safety_buffer").(int) tidyCertStore := d.Get("tidy_cert_store").(bool) @@ -100,7 +105,10 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr bufferDuration := time.Duration(safetyBuffer) * time.Second + // Manual run with constructed configuration. config := &tidyConfig{ + Enabled: true, + Interval: 0 * time.Second, CertStore: tidyCertStore, RevokedCerts: tidyRevokedCerts, IssuerAssocs: tidyRevokedAssocs, @@ -119,6 +127,13 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr Storage: req.Storage, } + // Mark the last tidy operation as relatively recent, to ensure we don't + // try to trigger the periodic function. + b.tidyStatusLock.Lock() + b.lastTidy = time.Now() + b.tidyStatusLock.Unlock() + + // Kick off the actual tidy. b.startTidyOperation(req, config) resp := &logical.Response{} @@ -163,6 +178,13 @@ func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) { b.tidyStatusStop(err) } else { b.tidyStatusStop(nil) + + // Since the tidy operation finished without an error, we don't + // really want to start another tidy right away (if the interval + // is too short). So mark the last tidy as now. + b.tidyStatusLock.Lock() + b.lastTidy = time.Now() + b.tidyStatusLock.Unlock() } }() } @@ -215,7 +237,9 @@ func (b *backend) doTidyCertStore(ctx context.Context, req *logical.Request, log } } + b.tidyStatusLock.RLock() metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries_remaining"}, float32(uint(serialCount)-b.tidyStatus.certStoreDeletedCount)) + b.tidyStatusLock.RUnlock() return nil } @@ -329,9 +353,11 @@ func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Reques } } + b.tidyStatusLock.RLock() metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries_remaining"}, float32(uint(revokedSerialsCount)-b.tidyStatus.revokedCertDeletedCount)) metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_entries_incorrect_issuers"}, float32(b.tidyStatus.missingIssuerCertCount)) metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_entries_fixed_issuers"}, float32(fixedIssuers)) + b.tidyStatusLock.RUnlock() if rebuildCRL { // Expired certificates isn't generally an important @@ -411,6 +437,69 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f return resp, nil } +func (b *backend) pathConfigAutoTidyRead(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) { + sc := b.makeStorageContext(ctx, req.Storage) + config, err := sc.getAutoTidyConfig() + if err != nil { + return nil, err + } + + return &logical.Response{ + Data: map[string]interface{}{ + "enabled": config.Enabled, + "interval_duration": int(config.Interval / time.Second), + "tidy_cert_store": config.CertStore, + "tidy_revoked_certs": config.RevokedCerts, + "tidy_revoked_cert_issuer_associations": config.IssuerAssocs, + "safety_buffer": int(config.SafetyBuffer / time.Second), + }, + }, nil +} + +func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + sc := b.makeStorageContext(ctx, req.Storage) + config, err := sc.getAutoTidyConfig() + if err != nil { + return nil, err + } + + if enabledRaw, ok := d.GetOk("enabled"); ok { + config.Enabled = enabledRaw.(bool) + } + + if intervalRaw, ok := d.GetOk("interval_duration"); ok { + config.Interval = time.Duration(intervalRaw.(int)) * time.Second + if config.Interval < 0 { + return logical.ErrorResponse(fmt.Sprintf("given interval_duration must be greater than or equal to zero seconds; got: %v", intervalRaw)), nil + } + } + + if certStoreRaw, ok := d.GetOk("tidy_cert_store"); ok { + config.CertStore = certStoreRaw.(bool) + } + + if revokedCertsRaw, ok := d.GetOk("tidy_revoked_certs"); ok { + config.RevokedCerts = revokedCertsRaw.(bool) + } + + if issuerAssocRaw, ok := d.GetOk("tidy_revoked_cert_issuer_associations"); ok { + config.IssuerAssocs = issuerAssocRaw.(bool) + } + + if safetyBufferRaw, ok := d.GetOk("safety_buffer"); ok { + config.SafetyBuffer = time.Duration(safetyBufferRaw.(int)) * time.Second + if config.SafetyBuffer < 1*time.Second { + return logical.ErrorResponse(fmt.Sprintf("given safety_buffer must be greater than zero seconds; got: %v", safetyBufferRaw)), nil + } + } + + if config.Enabled && !(config.CertStore || config.RevokedCerts || config.IssuerAssocs) { + return logical.ErrorResponse(fmt.Sprintf("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations).")), nil + } + + return nil, sc.writeAutoTidyConfig(config) +} + func (b *backend) tidyStatusStart(config *tidyConfig) { b.tidyStatusLock.Lock() defer b.tidyStatusLock.Unlock() @@ -529,3 +618,7 @@ The result includes the following fields: * 'revoked_cert_deleted_count': The number of revoked certificate entries deleted * 'missing_issuer_cert_count': The number of revoked certificates which were missing a valid issuer reference ` + +const pathConfigAutoTidySyn = `` + +const pathConfigAutoTidyDesc = `` diff --git a/builtin/logical/pki/path_tidy_test.go b/builtin/logical/pki/path_tidy_test.go new file mode 100644 index 000000000..2db8f6c4f --- /dev/null +++ b/builtin/logical/pki/path_tidy_test.go @@ -0,0 +1,157 @@ +package pki + +import ( + "testing" + "time" + + "github.com/hashicorp/vault/api" + vaulthttp "github.com/hashicorp/vault/http" + "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault" + + "github.com/stretchr/testify/require" +) + +func TestAutoTidy(t *testing.T) { + t.Parallel() + + // While we'd like to reduce this duration, we need to wait until + // the rollback manager timer ticks. With the new helper, we can + // modify the rollback manager timer period directly, allowing us + // to shorten the total test time significantly. + // + // We set the delta CRL time to ensure it executes prior to the + // main CRL rebuild, and the new CRL doesn't rebuild until after + // we're done. + newPeriod := 1 * time.Second + + // This test requires the periodicFunc to trigger, which requires we stand + // up a full test cluster. + coreConfig := &vault.CoreConfig{ + LogicalBackends: map[string]logical.Factory{ + "pki": Factory, + }, + // See notes below about usage of /sys/raw for reading cluster + // storage without barrier encryption. + EnableRaw: true, + } + cluster := vault.CreateTestClusterWithRollbackPeriod(t, newPeriod, coreConfig, &vault.TestClusterOptions{ + HandlerFunc: vaulthttp.Handler, + }) + defer cluster.Cleanup() + client := cluster.Cores[0].Client + + // Mount PKI + err := client.Sys().Mount("pki", &api.MountInput{ + Type: "pki", + Config: api.MountConfigInput{ + DefaultLeaseTTL: "10m", + MaxLeaseTTL: "60m", + }, + }) + require.NoError(t, err) + + // Generate root. + resp, err := client.Logical().Write("pki/root/generate/internal", map[string]interface{}{ + "ttl": "40h", + "common_name": "Root X1", + "key_type": "ec", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotEmpty(t, resp.Data) + require.NotEmpty(t, resp.Data["issuer_id"]) + + // Run tidy so status is not empty when we run it later... + _, err = client.Logical().Write("pki/tidy", map[string]interface{}{ + "tidy_revoked_certs": true, + }) + require.NoError(t, err) + + // Setup a testing role. + _, err = client.Logical().Write("pki/roles/local-testing", map[string]interface{}{ + "allow_any_name": true, + "enforce_hostnames": false, + "key_type": "ec", + }) + require.NoError(t, err) + + // Write the auto-tidy config. + _, err = client.Logical().Write("pki/config/auto-tidy", map[string]interface{}{ + "enabled": true, + "interval_duration": "1s", + "tidy_cert_store": true, + "tidy_revoked_certs": true, + "safety_buffer": "1s", + }) + require.NoError(t, err) + + // Issue a cert and revoke it. + resp, err = client.Logical().Write("pki/issue/local-testing", map[string]interface{}{ + "common_name": "example.com", + "ttl": "10s", + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["serial_number"]) + require.NotEmpty(t, resp.Data["certificate"]) + leafSerial := resp.Data["serial_number"].(string) + leafCert := parseCert(t, resp.Data["certificate"].(string)) + + _, err = client.Logical().Write("pki/revoke", map[string]interface{}{ + "serial_number": leafSerial, + }) + require.NoError(t, err) + + // Cert should still exist. + resp, err = client.Logical().Read("pki/cert/" + leafSerial) + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["certificate"]) + + // Wait for cert to expire and the safety buffer to elapse. + time.Sleep(leafCert.NotAfter.Sub(time.Now()) + 3*time.Second) + + // Wait for auto-tidy to run afterwards. + var foundTidyRunning string + var foundTidyFinished bool + timeoutChan := time.After(120 * time.Second) + for { + if foundTidyRunning != "" && foundTidyFinished { + break + } + + select { + case <-timeoutChan: + t.Fatalf("expected auto-tidy to run (%v) and finish (%v) before 120 seconds elapsed", foundTidyRunning, foundTidyFinished) + default: + time.Sleep(250 * time.Millisecond) + + resp, err = client.Logical().Read("pki/tidy-status") + require.NoError(t, err) + require.NotNil(t, resp) + require.NotNil(t, resp.Data) + require.NotEmpty(t, resp.Data["state"]) + require.NotEmpty(t, resp.Data["time_started"]) + state := resp.Data["state"].(string) + started := resp.Data["time_started"].(string) + t.Logf("Resp: %v", resp.Data) + + // We want the _next_ tidy run after the cert expires. This + // means if we're currently finished when we hit this the + // first time, we want to wait for the next run. + if foundTidyRunning == "" { + foundTidyRunning = started + } else if foundTidyRunning != started && !foundTidyFinished && state == "Finished" { + foundTidyFinished = true + } + } + } + + // Cert should no longer exist. + resp, err = client.Logical().Read("pki/cert/" + leafSerial) + require.Nil(t, err) + require.Nil(t, resp) +} diff --git a/builtin/logical/pki/storage.go b/builtin/logical/pki/storage.go index 77b8c61e2..a2a80f954 100644 --- a/builtin/logical/pki/storage.go +++ b/builtin/logical/pki/storage.go @@ -28,6 +28,8 @@ const ( deltaCRLPath = "delta-crl" deltaCRLPathSuffix = "-delta" + autoTidyConfigPath = "config/auto-tidy" + // Used as a quick sanity check for a reference id lookups... uuidLength = 36 @@ -1158,3 +1160,31 @@ func (sc *storageContext) getRevocationConfig() (*crlConfig, error) { return &result, nil } + +func (sc *storageContext) getAutoTidyConfig() (*tidyConfig, error) { + entry, err := sc.Storage.Get(sc.Context, autoTidyConfigPath) + if err != nil { + return nil, err + } + + var result tidyConfig + if entry == nil { + result = defaultTidyConfig + return &result, nil + } + + if err = entry.DecodeJSON(&result); err != nil { + return nil, err + } + + return &result, nil +} + +func (sc *storageContext) writeAutoTidyConfig(config *tidyConfig) error { + entry, err := logical.StorageEntryJSON(autoTidyConfigPath, config) + if err != nil { + return err + } + + return sc.Storage.Put(sc.Context, entry) +} diff --git a/changelog/16900.txt b/changelog/16900.txt new file mode 100644 index 000000000..35e2b5a96 --- /dev/null +++ b/changelog/16900.txt @@ -0,0 +1,3 @@ +```release-note:improvement +secrets/pki: Add ability to periodically run tidy operations to remove expired certificates. +``` diff --git a/vault/rollback.go b/vault/rollback.go index ed37f94ef..8ba0e27b2 100644 --- a/vault/rollback.go +++ b/vault/rollback.go @@ -17,7 +17,7 @@ import ( // rollbackPeriod is how often we attempt rollbacks for all the backends. // // This is turned into a variable to allow test to check behavior without -// waiting the full minute. See SetRollbackPeriodForTesting(...). +// waiting the full minute. See CreateTestClusterWithRollbackPeriod(...). var rollbackPeriod = time.Minute // RollbackManager is responsible for performing rollbacks of partial diff --git a/vault/testing.go b/vault/testing.go index 31bf8e575..9a9b5f5e5 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -2398,13 +2398,29 @@ func RetryUntil(t testing.T, timeout time.Duration, f func() error) { t.Fatalf("did not complete before deadline, err: %v", err) } -// SetRollbackPeriodForTesting lets us modify the periodic func invocation -// time period to some other value. Best practice is to set this, spin up -// a test cluster and immediately reset the value back to the default, to -// avoid impacting other tests too much. To that end, we return the original -// value of that period. -func SetRollbackPeriodForTesting(newPeriod time.Duration) time.Duration { +// CreateTestClusterWithRollbackPeriod lets us modify the periodic func +// invocation time period to some other value. +// +// Because multiple tests in the PKI mount use this helper, we've added +// a lock around it and created the cluster immediately in this helper. +// This ensures the tests don't race against each other. +var rollbackPeriodLock sync.Mutex + +func CreateTestClusterWithRollbackPeriod(t testing.T, newPeriod time.Duration, base *CoreConfig, opts *TestClusterOptions) *TestCluster { + rollbackPeriodLock.Lock() + defer rollbackPeriodLock.Unlock() + + // Set the period oldPeriod := rollbackPeriod + + // Create and start a new cluster. rollbackPeriod = newPeriod - return oldPeriod + cluster := NewTestCluster(t, base, opts) + cluster.Start() + + // Reset the period + rollbackPeriod = oldPeriod + + // Return the cluster. + return cluster } diff --git a/website/content/api-docs/secret/pki.mdx b/website/content/api-docs/secret/pki.mdx index 4fcd29f3c..c43a9ea89 100644 --- a/website/content/api-docs/secret/pki.mdx +++ b/website/content/api-docs/secret/pki.mdx @@ -68,6 +68,7 @@ update your API calls accordingly. - [Set CRL Configuration](#set-crl-configuration) - [Rotate CRLs](#rotate-crls) - [Tidy](#tidy) + - [Configure Automatic Tidy](#configure-automatic-tidy) - [Tidy Status](#tidy-status) - [Cluster Scalability](#cluster-scalability) - [Managed Key](#managed-keys) (Enterprise Only) @@ -3162,6 +3163,9 @@ expiration time. | :----- | :---------- | | `POST` | `/pki/tidy` | +~> Note: it is encouraged to use the [automatic tidy capabilities](#configure-automatic-tidy) + to ensure this gets run periodically. + #### Parameters - `tidy_cert_store` `(bool: false)` - Specifies whether to tidy up the certificate @@ -3204,6 +3208,66 @@ $ curl \ http://127.0.0.1:8200/v1/pki/tidy ``` +### Configure Automatic Tidy + +This endpoint allows configuring periodic tidy operations, using the tidy mechanism +described above. Status is from automatically run tidies are still reported at the +status endpoint described below. + +| Method | Path | +| :----- | :---------------------- | +| `POST` | `/pki/config/auto-tidy` | + +#### Parameters + +- `enabled` `(bool: false)` - Specifies whether automatic tidy is enabled or not. + +- `interval_duration` `(string: "")` - Specifies the duration between automatic tidy + operations; note that this is from the end of one operation to the start of + the next so the time of the operation itself does not need to be considered. + +- `tidy_cert_store` `(bool: false)` - Specifies whether to tidy up the certificate + store. + +- `tidy_revoked_certs` `(bool: false)` - Set to true to remove all invalid and + expired certificates from storage. A revoked storage entry is considered + invalid if the entry is empty, or the value within the entry is empty. If a + certificate is removed due to expiry, the entry will also be removed from the + CRL, and the CRL will be rotated. + +- `tidy_revoked_cert_issuer_associations` `(bool: false)` - Set to true to associate + revoked certificates with their corresponding issuers; this improves the + performance of OCSP and CRL building, by shifting work to a tidy operation + instead. + +- `safety_buffer` `(string: "")` - Specifies a duration using [duration format strings](/docs/concepts/duration-format) + used as a safety buffer to ensure certificates are not expunged prematurely; as an example, this can keep + certificates from being removed from the CRL that, due to clock skew, might + still be considered valid on other hosts. For a certificate to be expunged, + the time must be after the expiration time of the certificate (according to + the local clock) plus the duration of `safety_buffer`. Defaults to `72h`. + + +#### Sample Payload + +```json +{ + "enabled": true, + "tidy_revoked_cert_issuer_associations": true, + "safety_buffer": "24h" +} +``` + +#### Sample Request + +```shell-session +$ curl \ + --header "X-Vault-Token: ..." \ + --request POST \ + --data @payload.json \ + http://127.0.0.1:8200/v1/pki/config/auto-tidy +``` + ### Tidy Status This is a read only endpoint that returns information about the current tidy