Add tidy of cross-cluster revoked storage (#18860)

* Add new tidy operation for cross revoked certs

This operation allows tidying of the cross-cluster revocation storage.

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Fix missing cancels, status values

Previous additions to tidy didn't have enough cancel operations and left
out some new values from the status operation.

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
This commit is contained in:
Alexander Scheel 2023-01-26 13:30:57 -05:00 committed by GitHub
parent fe289a8659
commit ea539070c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 170 additions and 32 deletions

View file

@ -3942,6 +3942,8 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
"tidy_revoked_cert_issuer_associations": false, "tidy_revoked_cert_issuer_associations": false,
"tidy_expired_issuers": false, "tidy_expired_issuers": false,
"tidy_move_legacy_ca_bundle": false, "tidy_move_legacy_ca_bundle": false,
"tidy_revocation_queue": false,
"tidy_cross_cluster_revoked_certs": false,
"pause_duration": "0s", "pause_duration": "0s",
"state": "Finished", "state": "Finished",
"error": nil, "error": nil,
@ -3954,6 +3956,7 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
"current_cert_store_count": json.Number("0"), "current_cert_store_count": json.Number("0"),
"current_revoked_cert_count": json.Number("0"), "current_revoked_cert_count": json.Number("0"),
"revocation_queue_deleted_count": json.Number("0"), "revocation_queue_deleted_count": json.Number("0"),
"cross_revoked_cert_deleted_count": json.Number("0"),
} }
// Let's copy the times from the response so that we can use deep.Equal() // Let's copy the times from the response so that we can use deep.Equal()
timeStarted, ok := tidyStatus.Data["time_started"] timeStarted, ok := tidyStatus.Data["time_started"]

View file

@ -536,5 +536,12 @@ especially if the cluster is offline.`,
Default: int(defaultTidyConfig.QueueSafetyBuffer / time.Second), // TypeDurationSecond currently requires defaults to be int Default: int(defaultTidyConfig.QueueSafetyBuffer / time.Second), // TypeDurationSecond currently requires defaults to be int
} }
fields["tidy_cross_cluster_revoked_certs"] = &framework.FieldSchema{
Type: framework.TypeBool,
Description: `Set to true to enable tidying up
the cross-cluster revoked certificate store. Only runs on the active
primary node.`,
}
return fields return fields
} }

View file

@ -39,6 +39,8 @@ type tidyStatus struct {
tidyRevokedAssocs bool tidyRevokedAssocs bool
tidyExpiredIssuers bool tidyExpiredIssuers bool
tidyBackupBundle bool tidyBackupBundle bool
tidyRevocationQueue bool
tidyCrossRevokedCerts bool
pauseDuration string pauseDuration string
// Status // Status
@ -47,10 +49,14 @@ type tidyStatus struct {
timeStarted time.Time timeStarted time.Time
timeFinished time.Time timeFinished time.Time
message string message string
// These counts use a custom incrementer that grab and release
// a lock prior to reading.
certStoreDeletedCount uint certStoreDeletedCount uint
revokedCertDeletedCount uint revokedCertDeletedCount uint
missingIssuerCertCount uint missingIssuerCertCount uint
revQueueDeletedCount uint revQueueDeletedCount uint
crossRevokedDeletedCount uint
} }
type tidyConfig struct { type tidyConfig struct {
@ -66,6 +72,7 @@ type tidyConfig struct {
PauseDuration time.Duration `json:"pause_duration"` PauseDuration time.Duration `json:"pause_duration"`
RevocationQueue bool `json:"tidy_revocation_queue"` RevocationQueue bool `json:"tidy_revocation_queue"`
QueueSafetyBuffer time.Duration `json:"revocation_queue_safety_buffer"` QueueSafetyBuffer time.Duration `json:"revocation_queue_safety_buffer"`
CrossRevokedCerts bool `json:"tidy_cross_cluster_revoked_certs"`
} }
var defaultTidyConfig = tidyConfig{ var defaultTidyConfig = tidyConfig{
@ -81,6 +88,7 @@ var defaultTidyConfig = tidyConfig{
PauseDuration: 0 * time.Second, PauseDuration: 0 * time.Second,
RevocationQueue: false, RevocationQueue: false,
QueueSafetyBuffer: 48 * time.Hour, QueueSafetyBuffer: 48 * time.Hour,
CrossRevokedCerts: false,
} }
func pathTidy(b *backend) *framework.Path { func pathTidy(b *backend) *framework.Path {
@ -168,6 +176,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
pauseDuration := 0 * time.Second pauseDuration := 0 * time.Second
tidyRevocationQueue := d.Get("tidy_revocation_queue").(bool) tidyRevocationQueue := d.Get("tidy_revocation_queue").(bool)
queueSafetyBuffer := d.Get("revocation_queue_safety_buffer").(int) queueSafetyBuffer := d.Get("revocation_queue_safety_buffer").(int)
tidyCrossRevokedCerts := d.Get("tidy_cross_cluster_revoked_certs").(bool)
if safetyBuffer < 1 { if safetyBuffer < 1 {
return logical.ErrorResponse("safety_buffer must be greater than zero"), nil return logical.ErrorResponse("safety_buffer must be greater than zero"), nil
@ -211,6 +220,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
PauseDuration: pauseDuration, PauseDuration: pauseDuration,
RevocationQueue: tidyRevocationQueue, RevocationQueue: tidyRevocationQueue,
QueueSafetyBuffer: queueSafetyBufferDuration, QueueSafetyBuffer: queueSafetyBufferDuration,
CrossRevokedCerts: tidyCrossRevokedCerts,
} }
if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) { if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) {
@ -235,17 +245,17 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
b.startTidyOperation(req, config) b.startTidyOperation(req, config)
resp := &logical.Response{} resp := &logical.Response{}
if !tidyCertStore && !tidyRevokedCerts && !tidyRevokedAssocs && !tidyExpiredIssuers && !tidyBackupBundle && !tidyRevocationQueue { if !tidyCertStore && !tidyRevokedCerts && !tidyRevokedAssocs && !tidyExpiredIssuers && !tidyBackupBundle && !tidyRevocationQueue && !tidyCrossRevokedCerts {
resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true or tidy_revoked_cert_issuer_associations=true or tidy_expired_issuers=true or tidy_move_legacy_ca_bundle=true or tidy_revocation_queue=true to start a tidy operation.") resp.AddWarning("No targets to tidy; specify tidy_cert_store=true or tidy_revoked_certs=true or tidy_revoked_cert_issuer_associations=true or tidy_expired_issuers=true or tidy_move_legacy_ca_bundle=true or tidy_revocation_queue=true or tidy_cross_cluster_revoked_certs=true to start a tidy operation.")
} else { } else {
resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.") resp.AddWarning("Tidy operation successfully started. Any information from the operation will be printed to Vault's server logs.")
} }
if tidyRevocationQueue { if tidyRevocationQueue || tidyCrossRevokedCerts {
isNotPerfPrimary := b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) || isNotPerfPrimary := b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
(!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) (!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary))
if isNotPerfPrimary { if isNotPerfPrimary {
resp.AddWarning("tidy_revocation_queue=true can only be set on the active node of the primary cluster unless a local mount is used; this option has been ignored.") resp.AddWarning("tidy_revocation_queue=true and tidy_cross_cluster_revoked_certs=true can only be set on the active node of the primary cluster unless a local mount is used; this option has been ignored.")
} }
} }
@ -315,6 +325,17 @@ func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) {
} }
} }
// Check for cancel before continuing.
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
return tidyCancelledError
}
if config.CrossRevokedCerts {
if err := b.doTidyCrossRevocationStore(ctx, req, logger, config); err != nil {
return err
}
}
return nil return nil
} }
@ -547,6 +568,9 @@ func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Reques
} }
func (b *backend) doTidyExpiredIssuers(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error { func (b *backend) doTidyExpiredIssuers(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error {
// We do not support cancelling within the expired issuers operation.
// Any cancellation will occur before or after this operation.
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) || if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
(!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) { (!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) {
b.Logger().Debug("skipping expired issuer tidy as we're not on the primary or secondary with a local mount") b.Logger().Debug("skipping expired issuer tidy as we're not on the primary or secondary with a local mount")
@ -647,6 +671,9 @@ func (b *backend) doTidyExpiredIssuers(ctx context.Context, req *logical.Request
} }
func (b *backend) doTidyMoveCABundle(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error { func (b *backend) doTidyMoveCABundle(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error {
// We do not support cancelling within this operation; any cancel will
// occur before or after this operation.
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) || if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
(!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) { (!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) {
b.Logger().Debug("skipping moving the legacy CA bundle as we're not on the primary or secondary with a local mount") b.Logger().Debug("skipping moving the legacy CA bundle as we're not on the primary or secondary with a local mount")
@ -733,6 +760,11 @@ func (b *backend) doTidyRevocationQueue(ctx context.Context, req *logical.Reques
} }
for _, serial := range serials { for _, serial := range serials {
// Check for cancellation.
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
return tidyCancelledError
}
// Check for pause duration to reduce resource consumption. // Check for pause duration to reduce resource consumption.
if config.PauseDuration > (0 * time.Second) { if config.PauseDuration > (0 * time.Second) {
b.revokeStorageLock.Unlock() b.revokeStorageLock.Unlock()
@ -793,7 +825,7 @@ func (b *backend) doTidyRevocationQueue(ctx context.Context, req *logical.Reques
return fmt.Errorf("error reading revocation request (%v) to tidy: %w", ePath, err) return fmt.Errorf("error reading revocation request (%v) to tidy: %w", ePath, err)
} }
if time.Since(revRequest.RequestedAt) > config.QueueSafetyBuffer { if time.Since(revRequest.RequestedAt) <= config.QueueSafetyBuffer {
continue continue
} }
@ -821,6 +853,77 @@ func (b *backend) doTidyRevocationQueue(ctx context.Context, req *logical.Reques
return nil return nil
} }
func (b *backend) doTidyCrossRevocationStore(ctx context.Context, req *logical.Request, logger hclog.Logger, config *tidyConfig) error {
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
(!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) {
b.Logger().Debug("skipping cross-cluster revoked certificate store tidy as we're not on the primary or secondary with a local mount")
return nil
}
sc := b.makeStorageContext(ctx, req.Storage)
clusters, err := sc.Storage.List(sc.Context, unifiedRevocationReadPathPrefix)
if err != nil {
return fmt.Errorf("failed to list cross-cluster revoked certificate store participating clusters: %w", err)
}
// Grab locks as we're potentially modifying revocation-related storage.
b.revokeStorageLock.Lock()
defer b.revokeStorageLock.Unlock()
for cIndex, cluster := range clusters {
if cluster[len(cluster)-1] == '/' {
cluster = cluster[0 : len(cluster)-1]
}
cPath := unifiedRevocationReadPathPrefix + cluster + "/"
serials, err := sc.Storage.List(sc.Context, cPath)
if err != nil {
return fmt.Errorf("failed to list cross-cluster revoked certificate store entries for cluster %v (%v): %w", cluster, cIndex, err)
}
for _, serial := range serials {
// Check for cancellation.
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
return tidyCancelledError
}
// Check for pause duration to reduce resource consumption.
if config.PauseDuration > (0 * time.Second) {
b.revokeStorageLock.Unlock()
time.Sleep(config.PauseDuration)
b.revokeStorageLock.Lock()
}
ePath := cPath + serial
entry, err := sc.Storage.Get(sc.Context, ePath)
if err != nil {
return fmt.Errorf("error reading cross-cluster revocation entry (%v) to tidy: %w", ePath, err)
}
if entry == nil || entry.Value == nil {
continue
}
var details unifiedRevocationEntry
if err := entry.DecodeJSON(&details); err != nil {
return fmt.Errorf("error decoding cross-cluster revocation entry (%v) to tidy: %w", ePath, err)
}
if time.Since(details.CertExpiration) <= config.SafetyBuffer {
continue
}
// Safe to remove this entry.
if err := sc.Storage.Delete(sc.Context, ePath); err != nil {
return fmt.Errorf("error deleting revocation request (%v): %w", ePath, err)
}
b.tidyStatusIncCrossRevCertCount()
}
}
return nil
}
func (b *backend) pathTidyCancelWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { func (b *backend) pathTidyCancelWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
if atomic.LoadUint32(b.tidyCASGuard) == 0 { if atomic.LoadUint32(b.tidyCASGuard) == 0 {
resp := &logical.Response{} resp := &logical.Response{}
@ -857,6 +960,8 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
"tidy_revoked_cert_issuer_associations": nil, "tidy_revoked_cert_issuer_associations": nil,
"tidy_expired_issuers": nil, "tidy_expired_issuers": nil,
"tidy_move_legacy_ca_bundle": nil, "tidy_move_legacy_ca_bundle": nil,
"tidy_revocation_queue": nil,
"tidy_cross_cluster_revoked_certs": nil,
"pause_duration": nil, "pause_duration": nil,
"state": "Inactive", "state": "Inactive",
"error": nil, "error": nil,
@ -869,6 +974,7 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
"current_cert_store_count": nil, "current_cert_store_count": nil,
"current_revoked_cert_count": nil, "current_revoked_cert_count": nil,
"revocation_queue_deleted_count": nil, "revocation_queue_deleted_count": nil,
"cross_revoked_cert_deleted_count": nil,
}, },
} }
@ -883,6 +989,8 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
resp.Data["tidy_revoked_cert_issuer_associations"] = b.tidyStatus.tidyRevokedAssocs resp.Data["tidy_revoked_cert_issuer_associations"] = b.tidyStatus.tidyRevokedAssocs
resp.Data["tidy_expired_issuers"] = b.tidyStatus.tidyExpiredIssuers resp.Data["tidy_expired_issuers"] = b.tidyStatus.tidyExpiredIssuers
resp.Data["tidy_move_legacy_ca_bundle"] = b.tidyStatus.tidyBackupBundle resp.Data["tidy_move_legacy_ca_bundle"] = b.tidyStatus.tidyBackupBundle
resp.Data["tidy_revocation_queue"] = b.tidyStatus.tidyRevocationQueue
resp.Data["tidy_cross_cluster_revoked_certs"] = b.tidyStatus.tidyCrossRevokedCerts
resp.Data["pause_duration"] = b.tidyStatus.pauseDuration resp.Data["pause_duration"] = b.tidyStatus.pauseDuration
resp.Data["time_started"] = b.tidyStatus.timeStarted resp.Data["time_started"] = b.tidyStatus.timeStarted
resp.Data["message"] = b.tidyStatus.message resp.Data["message"] = b.tidyStatus.message
@ -890,6 +998,7 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
resp.Data["revoked_cert_deleted_count"] = b.tidyStatus.revokedCertDeletedCount resp.Data["revoked_cert_deleted_count"] = b.tidyStatus.revokedCertDeletedCount
resp.Data["missing_issuer_cert_count"] = b.tidyStatus.missingIssuerCertCount resp.Data["missing_issuer_cert_count"] = b.tidyStatus.missingIssuerCertCount
resp.Data["revocation_queue_deleted_count"] = b.tidyStatus.revQueueDeletedCount resp.Data["revocation_queue_deleted_count"] = b.tidyStatus.revQueueDeletedCount
resp.Data["cross_revoked_cert_deleted_count"] = b.tidyStatus.crossRevokedDeletedCount
switch b.tidyStatus.state { switch b.tidyStatus.state {
case tidyStatusStarted: case tidyStatusStarted:
@ -943,6 +1052,7 @@ func (b *backend) pathConfigAutoTidyRead(ctx context.Context, req *logical.Reque
"pause_duration": config.PauseDuration.String(), "pause_duration": config.PauseDuration.String(),
"tidy_revocation_queue": config.RevocationQueue, "tidy_revocation_queue": config.RevocationQueue,
"revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second), "revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second),
"tidy_cross_cluster_revoked_certs": config.CrossRevokedCerts,
}, },
}, nil }, nil
} }
@ -1021,8 +1131,12 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
} }
} }
if config.Enabled && !(config.CertStore || config.RevokedCerts || config.IssuerAssocs || config.ExpiredIssuers || config.BackupBundle || config.RevocationQueue) { if crossRevokedRaw, ok := d.GetOk("tidy_cross_cluster_revoked_certs"); ok {
return logical.ErrorResponse("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations)."), nil config.CrossRevokedCerts = crossRevokedRaw.(bool)
}
if config.Enabled && !(config.CertStore || config.RevokedCerts || config.IssuerAssocs || config.ExpiredIssuers || config.BackupBundle || config.RevocationQueue || config.CrossRevokedCerts) {
return logical.ErrorResponse("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations / tidy_expired_issuers / tidy_move_legacy_ca_bundle / tidy_revocation_queue / tidy_cross_cluster_revoked_certs)."), nil
} }
if err := sc.writeAutoTidyConfig(config); err != nil { if err := sc.writeAutoTidyConfig(config); err != nil {
@ -1043,6 +1157,7 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
"pause_duration": config.PauseDuration.String(), "pause_duration": config.PauseDuration.String(),
"tidy_revocation_queue": config.RevocationQueue, "tidy_revocation_queue": config.RevocationQueue,
"revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second), "revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second),
"tidy_cross_cluster_revoked_certs": config.CrossRevokedCerts,
}, },
}, nil }, nil
} }
@ -1059,6 +1174,8 @@ func (b *backend) tidyStatusStart(config *tidyConfig) {
tidyRevokedAssocs: config.IssuerAssocs, tidyRevokedAssocs: config.IssuerAssocs,
tidyExpiredIssuers: config.ExpiredIssuers, tidyExpiredIssuers: config.ExpiredIssuers,
tidyBackupBundle: config.BackupBundle, tidyBackupBundle: config.BackupBundle,
tidyRevocationQueue: config.RevocationQueue,
tidyCrossRevokedCerts: config.CrossRevokedCerts,
pauseDuration: config.PauseDuration.String(), pauseDuration: config.PauseDuration.String(),
state: tidyStatusStarted, state: tidyStatusStarted,
@ -1133,6 +1250,13 @@ func (b *backend) tidyStatusIncRevQueueCount() {
b.tidyStatus.revQueueDeletedCount++ b.tidyStatus.revQueueDeletedCount++
} }
func (b *backend) tidyStatusIncCrossRevCertCount() {
b.tidyStatusLock.Lock()
defer b.tidyStatusLock.Unlock()
b.tidyStatus.crossRevokedDeletedCount++
}
const pathTidyHelpSyn = ` const pathTidyHelpSyn = `
Tidy up the backend by removing expired certificates, revocation information, Tidy up the backend by removing expired certificates, revocation information,
or both. or both.
@ -1197,6 +1321,10 @@ The result includes the following fields:
* 'tidy_expired_issuers': the value of this parameter when initiating the tidy operation * 'tidy_expired_issuers': the value of this parameter when initiating the tidy operation
* 'issuer_safety_buffer': the value of this parameter when initiating the tidy operation * 'issuer_safety_buffer': the value of this parameter when initiating the tidy operation
* 'tidy_move_legacy_ca_bundle': the value of this parameter when initiating the tidy operation * 'tidy_move_legacy_ca_bundle': the value of this parameter when initiating the tidy operation
* 'tidy_revocation_queue': the value of this parameter when initiating the tidy operation
* 'revocation_queue_deleted_count': the number of revocation queue entries deleted
* 'tidy_cross_cluster_revoked_certs': the value of this parameter when initiating the tidy operation
* 'cross_revoked_cert_deleted_count': the number of cross-cluster revoked certificate entries deleted
` `
const pathConfigAutoTidySyn = ` const pathConfigAutoTidySyn = `