Add ability to cancel PKI tidy operations, pause between tidying certs (#16958)
* Allow tidy operations to be cancelled When tidy operations take a long time to execute (and especially when executing them automatically), having the ability to cancel them becomes useful to reduce strain on Vault clusters (and let them be rescheduled at a later time). To this end, we add the /tidy-cancel write endpoint. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add missing auto-tidy synopsis / description Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add a pause duration between tidying certificates By setting pause_duration, operators can have a little control over the resource utilization of a tidy operation. While the list of certificates remain in memory throughout the entire operation, a pause is added between processing certificates and the revocation lock is released. This allows other operations to occur during this gap and potentially allows the tidy operation to consume less resources per unit of time (due to the sleep -- though obviously consumes the same resources over the time of the operation). Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add tests for cancellation, pause Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add API docs on pause_duration, /tidy-cancel Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add changelog entry Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add lock releasing around tidy pause Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Reset cancel guard, return errors Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
This commit is contained in:
parent
09ad6ab72c
commit
f0a127487b
|
@ -131,6 +131,7 @@ func Backend(conf *logical.BackendConfig) *backend {
|
|||
pathRevoke(&b),
|
||||
pathRevokeWithKey(&b),
|
||||
pathTidy(&b),
|
||||
pathTidyCancel(&b),
|
||||
pathTidyStatus(&b),
|
||||
pathConfigAutoTidy(&b),
|
||||
|
||||
|
@ -184,6 +185,7 @@ func Backend(conf *logical.BackendConfig) *backend {
|
|||
}
|
||||
|
||||
b.tidyCASGuard = new(uint32)
|
||||
b.tidyCancelCAS = new(uint32)
|
||||
b.tidyStatus = &tidyStatus{state: tidyStatusInactive}
|
||||
b.storage = conf.StorageView
|
||||
b.backendUUID = conf.BackendUUID
|
||||
|
@ -205,6 +207,7 @@ type backend struct {
|
|||
storage logical.Storage
|
||||
revokeStorageLock sync.RWMutex
|
||||
tidyCASGuard *uint32
|
||||
tidyCancelCAS *uint32
|
||||
|
||||
tidyStatusLock sync.RWMutex
|
||||
tidyStatus *tidyStatus
|
||||
|
@ -223,10 +226,12 @@ type (
|
|||
)
|
||||
|
||||
const (
|
||||
tidyStatusInactive tidyStatusState = iota
|
||||
tidyStatusStarted
|
||||
tidyStatusFinished
|
||||
tidyStatusError
|
||||
tidyStatusInactive tidyStatusState = iota
|
||||
tidyStatusStarted = iota
|
||||
tidyStatusFinished = iota
|
||||
tidyStatusError = iota
|
||||
tidyStatusCancelling = iota
|
||||
tidyStatusCancelled = iota
|
||||
)
|
||||
|
||||
type tidyStatus struct {
|
||||
|
@ -235,6 +240,7 @@ type tidyStatus struct {
|
|||
tidyCertStore bool
|
||||
tidyRevokedCerts bool
|
||||
tidyRevokedAssocs bool
|
||||
pauseDuration string
|
||||
|
||||
// Status
|
||||
state tidyStatusState
|
||||
|
|
|
@ -3877,6 +3877,7 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
|||
"tidy_cert_store": true,
|
||||
"tidy_revoked_certs": true,
|
||||
"tidy_revoked_cert_issuer_associations": false,
|
||||
"pause_duration": "0s",
|
||||
"state": "Finished",
|
||||
"error": nil,
|
||||
"time_started": nil,
|
||||
|
|
|
@ -463,5 +463,17 @@ Defaults to 72 hours.`,
|
|||
Default: int(defaultTidyConfig.SafetyBuffer / time.Second), // TypeDurationSecond currently requires defaults to be int
|
||||
}
|
||||
|
||||
fields["pause_duration"] = &framework.FieldSchema{
|
||||
Type: framework.TypeString,
|
||||
Description: `The amount of time to wait between processing
|
||||
certificates. This allows operators to change the execution profile
|
||||
of tidy to take consume less resources by slowing down how long it
|
||||
takes to run. Note that the entire list of certificates will be
|
||||
stored in memory during the entire tidy operation, but resources to
|
||||
read/process/update existing entries will be spread out over a
|
||||
greater period of time. By default this is zero seconds.`,
|
||||
Default: "0s",
|
||||
}
|
||||
|
||||
return fields
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package pki
|
|||
import (
|
||||
"context"
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync/atomic"
|
||||
|
@ -16,22 +17,26 @@ import (
|
|||
"github.com/hashicorp/vault/sdk/logical"
|
||||
)
|
||||
|
||||
var tidyCancelledError = errors.New("tidy operation cancelled")
|
||||
|
||||
type tidyConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Interval time.Duration `json:"interval_duration"`
|
||||
CertStore bool `json:"tidy_cert_store"`
|
||||
RevokedCerts bool `json:"tidy_revoked_certs"`
|
||||
IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"`
|
||||
SafetyBuffer time.Duration `json:"safety_buffer"`
|
||||
Enabled bool `json:"enabled"`
|
||||
Interval time.Duration `json:"interval_duration"`
|
||||
CertStore bool `json:"tidy_cert_store"`
|
||||
RevokedCerts bool `json:"tidy_revoked_certs"`
|
||||
IssuerAssocs bool `json:"tidy_revoked_cert_issuer_associations"`
|
||||
SafetyBuffer time.Duration `json:"safety_buffer"`
|
||||
PauseDuration time.Duration `json:"pause_duration"`
|
||||
}
|
||||
|
||||
var defaultTidyConfig = tidyConfig{
|
||||
Enabled: false,
|
||||
Interval: 12 * time.Hour,
|
||||
CertStore: false,
|
||||
RevokedCerts: false,
|
||||
IssuerAssocs: false,
|
||||
SafetyBuffer: 72 * time.Hour,
|
||||
Enabled: false,
|
||||
Interval: 12 * time.Hour,
|
||||
CertStore: false,
|
||||
RevokedCerts: false,
|
||||
IssuerAssocs: false,
|
||||
SafetyBuffer: 72 * time.Hour,
|
||||
PauseDuration: 0 * time.Second,
|
||||
}
|
||||
|
||||
func pathTidy(b *backend) *framework.Path {
|
||||
|
@ -49,6 +54,20 @@ func pathTidy(b *backend) *framework.Path {
|
|||
}
|
||||
}
|
||||
|
||||
func pathTidyCancel(b *backend) *framework.Path {
|
||||
return &framework.Path{
|
||||
Pattern: "tidy-cancel$",
|
||||
Operations: map[logical.Operation]framework.OperationHandler{
|
||||
logical.UpdateOperation: &framework.PathOperation{
|
||||
Callback: b.pathTidyCancelWrite,
|
||||
ForwardPerformanceStandby: true,
|
||||
},
|
||||
},
|
||||
HelpSynopsis: pathTidyCancelHelpSyn,
|
||||
HelpDescription: pathTidyCancelHelpDesc,
|
||||
}
|
||||
}
|
||||
|
||||
func pathTidyStatus(b *backend) *framework.Path {
|
||||
return &framework.Path{
|
||||
Pattern: "tidy-status$",
|
||||
|
@ -98,21 +117,36 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
|||
tidyCertStore := d.Get("tidy_cert_store").(bool)
|
||||
tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool) || d.Get("tidy_revocation_list").(bool)
|
||||
tidyRevokedAssocs := d.Get("tidy_revoked_cert_issuer_associations").(bool)
|
||||
pauseDurationStr := d.Get("pause_duration").(string)
|
||||
pauseDuration := 0 * time.Second
|
||||
|
||||
if safetyBuffer < 1 {
|
||||
return logical.ErrorResponse("safety_buffer must be greater than zero"), nil
|
||||
}
|
||||
|
||||
if pauseDurationStr != "" {
|
||||
var err error
|
||||
pauseDuration, err = time.ParseDuration(pauseDurationStr)
|
||||
if err != nil {
|
||||
return logical.ErrorResponse(fmt.Sprintf("Error parsing pause_duration: %v", err)), nil
|
||||
}
|
||||
|
||||
if pauseDuration < (0 * time.Second) {
|
||||
return logical.ErrorResponse("received invalid, negative pause_duration"), nil
|
||||
}
|
||||
}
|
||||
|
||||
bufferDuration := time.Duration(safetyBuffer) * time.Second
|
||||
|
||||
// Manual run with constructed configuration.
|
||||
config := &tidyConfig{
|
||||
Enabled: true,
|
||||
Interval: 0 * time.Second,
|
||||
CertStore: tidyCertStore,
|
||||
RevokedCerts: tidyRevokedCerts,
|
||||
IssuerAssocs: tidyRevokedAssocs,
|
||||
SafetyBuffer: bufferDuration,
|
||||
Enabled: true,
|
||||
Interval: 0 * time.Second,
|
||||
CertStore: tidyCertStore,
|
||||
RevokedCerts: tidyRevokedCerts,
|
||||
IssuerAssocs: tidyRevokedAssocs,
|
||||
SafetyBuffer: bufferDuration,
|
||||
PauseDuration: pauseDuration,
|
||||
}
|
||||
|
||||
if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) {
|
||||
|
@ -148,6 +182,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
|||
|
||||
func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) {
|
||||
go func() {
|
||||
atomic.StoreUint32(b.tidyCancelCAS, 0)
|
||||
defer atomic.StoreUint32(b.tidyCASGuard, 0)
|
||||
|
||||
b.tidyStatusStart(config)
|
||||
|
@ -164,9 +199,14 @@ func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) {
|
|||
}
|
||||
}
|
||||
|
||||
// Check for cancel before continuing.
|
||||
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
|
||||
return tidyCancelledError
|
||||
}
|
||||
|
||||
if config.RevokedCerts || config.IssuerAssocs {
|
||||
if err := b.doTidyRevocationStore(ctx, req, logger, config); err != nil {
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -201,6 +241,16 @@ func (b *backend) doTidyCertStore(ctx context.Context, req *logical.Request, log
|
|||
b.tidyStatusMessage(fmt.Sprintf("Tidying certificate store: checking entry %d of %d", i, serialCount))
|
||||
metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_current_entry"}, float32(i))
|
||||
|
||||
// Check for cancel before continuing.
|
||||
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
|
||||
return tidyCancelledError
|
||||
}
|
||||
|
||||
// Check for pause duration to reduce resource consumption.
|
||||
if config.PauseDuration > (0 * time.Second) {
|
||||
time.Sleep(config.PauseDuration)
|
||||
}
|
||||
|
||||
certEntry, err := req.Storage.Get(ctx, "certs/"+serial)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error fetching certificate %q: %w", serial, err)
|
||||
|
@ -272,6 +322,18 @@ func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Reques
|
|||
b.tidyStatusMessage(fmt.Sprintf("Tidying revoked certificates: checking certificate %d of %d", i, len(revokedSerials)))
|
||||
metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_current_entry"}, float32(i))
|
||||
|
||||
// Check for cancel before continuing.
|
||||
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 1, 0) {
|
||||
return tidyCancelledError
|
||||
}
|
||||
|
||||
// Check for pause duration to reduce resource consumption.
|
||||
if config.PauseDuration > (0 * time.Second) {
|
||||
b.revokeStorageLock.Unlock()
|
||||
time.Sleep(config.PauseDuration)
|
||||
b.revokeStorageLock.Lock()
|
||||
}
|
||||
|
||||
revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err)
|
||||
|
@ -379,6 +441,33 @@ func (b *backend) doTidyRevocationStore(ctx context.Context, req *logical.Reques
|
|||
return nil
|
||||
}
|
||||
|
||||
func (b *backend) pathTidyCancelWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
|
||||
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary) && !b.System().LocalMount() {
|
||||
return nil, logical.ErrReadOnly
|
||||
}
|
||||
|
||||
if atomic.LoadUint32(b.tidyCASGuard) == 0 {
|
||||
resp := &logical.Response{}
|
||||
resp.AddWarning("Tidy operation cannot be cancelled as none is currently running.")
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Grab the status lock before writing the cancel atomic. This lets us
|
||||
// update the status correctly as well, avoiding writing it if we're not
|
||||
// presently running.
|
||||
//
|
||||
// Unlock needs to occur prior to calling read.
|
||||
b.tidyStatusLock.Lock()
|
||||
if b.tidyStatus.state == tidyStatusStarted || atomic.LoadUint32(b.tidyCASGuard) == 1 {
|
||||
if atomic.CompareAndSwapUint32(b.tidyCancelCAS, 0, 1) {
|
||||
b.tidyStatus.state = tidyStatusCancelling
|
||||
}
|
||||
}
|
||||
b.tidyStatusLock.Unlock()
|
||||
|
||||
return b.pathTidyStatusRead(ctx, req, d)
|
||||
}
|
||||
|
||||
func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *framework.FieldData) (*logical.Response, error) {
|
||||
// If this node is a performance secondary return an ErrReadOnly so that the request gets forwarded,
|
||||
// but only if the PKI backend is not a local mount.
|
||||
|
@ -391,17 +480,19 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
|
|||
|
||||
resp := &logical.Response{
|
||||
Data: map[string]interface{}{
|
||||
"safety_buffer": nil,
|
||||
"tidy_cert_store": nil,
|
||||
"tidy_revoked_certs": nil,
|
||||
"state": "Inactive",
|
||||
"error": nil,
|
||||
"time_started": nil,
|
||||
"time_finished": nil,
|
||||
"message": nil,
|
||||
"cert_store_deleted_count": nil,
|
||||
"revoked_cert_deleted_count": nil,
|
||||
"missing_issuer_cert_count": nil,
|
||||
"safety_buffer": nil,
|
||||
"tidy_cert_store": nil,
|
||||
"tidy_revoked_certs": nil,
|
||||
"tidy_revoked_cert_issuer_associations": nil,
|
||||
"pause_duration": nil,
|
||||
"state": "Inactive",
|
||||
"error": nil,
|
||||
"time_started": nil,
|
||||
"time_finished": nil,
|
||||
"message": nil,
|
||||
"cert_store_deleted_count": nil,
|
||||
"revoked_cert_deleted_count": nil,
|
||||
"missing_issuer_cert_count": nil,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -413,6 +504,7 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
|
|||
resp.Data["tidy_cert_store"] = b.tidyStatus.tidyCertStore
|
||||
resp.Data["tidy_revoked_certs"] = b.tidyStatus.tidyRevokedCerts
|
||||
resp.Data["tidy_revoked_cert_issuer_associations"] = b.tidyStatus.tidyRevokedAssocs
|
||||
resp.Data["pause_duration"] = b.tidyStatus.pauseDuration
|
||||
resp.Data["time_started"] = b.tidyStatus.timeStarted
|
||||
resp.Data["message"] = b.tidyStatus.message
|
||||
resp.Data["cert_store_deleted_count"] = b.tidyStatus.certStoreDeletedCount
|
||||
|
@ -432,6 +524,11 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
|
|||
resp.Data["error"] = b.tidyStatus.err.Error()
|
||||
// Don't clear the message so that it serves as a hint about when
|
||||
// the error occurred.
|
||||
case tidyStatusCancelling:
|
||||
resp.Data["state"] = "Cancelling"
|
||||
case tidyStatusCancelled:
|
||||
resp.Data["state"] = "Cancelled"
|
||||
resp.Data["time_finished"] = b.tidyStatus.timeFinished
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
@ -452,6 +549,7 @@ func (b *backend) pathConfigAutoTidyRead(ctx context.Context, req *logical.Reque
|
|||
"tidy_revoked_certs": config.RevokedCerts,
|
||||
"tidy_revoked_cert_issuer_associations": config.IssuerAssocs,
|
||||
"safety_buffer": int(config.SafetyBuffer / time.Second),
|
||||
"pause_duration": config.PauseDuration.String(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
@ -493,6 +591,17 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
|
|||
}
|
||||
}
|
||||
|
||||
if pauseDurationRaw, ok := d.GetOk("pause_duration"); ok {
|
||||
config.PauseDuration, err = time.ParseDuration(pauseDurationRaw.(string))
|
||||
if err != nil {
|
||||
return logical.ErrorResponse(fmt.Sprintf("unable to parse given pause_duration: %v", err)), nil
|
||||
}
|
||||
|
||||
if config.PauseDuration < (0 * time.Second) {
|
||||
return logical.ErrorResponse("received invalid, negative pause_duration"), nil
|
||||
}
|
||||
}
|
||||
|
||||
if config.Enabled && !(config.CertStore || config.RevokedCerts || config.IssuerAssocs) {
|
||||
return logical.ErrorResponse(fmt.Sprintf("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations).")), nil
|
||||
}
|
||||
|
@ -509,8 +618,10 @@ func (b *backend) tidyStatusStart(config *tidyConfig) {
|
|||
tidyCertStore: config.CertStore,
|
||||
tidyRevokedCerts: config.RevokedCerts,
|
||||
tidyRevokedAssocs: config.IssuerAssocs,
|
||||
state: tidyStatusStarted,
|
||||
timeStarted: time.Now(),
|
||||
pauseDuration: config.PauseDuration.String(),
|
||||
|
||||
state: tidyStatusStarted,
|
||||
timeStarted: time.Now(),
|
||||
}
|
||||
|
||||
metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, float32(b.tidyStatus.timeStarted.Unix()))
|
||||
|
@ -524,6 +635,8 @@ func (b *backend) tidyStatusStop(err error) {
|
|||
b.tidyStatus.err = err
|
||||
if err == nil {
|
||||
b.tidyStatus.state = tidyStatusFinished
|
||||
} else if err == tidyCancelledError {
|
||||
b.tidyStatus.state = tidyStatusCancelled
|
||||
} else {
|
||||
b.tidyStatus.state = tidyStatusError
|
||||
}
|
||||
|
@ -595,6 +708,18 @@ current time, minus the value of 'safety_buffer', is greater than the
|
|||
expiration, it will be removed.
|
||||
`
|
||||
|
||||
const pathTidyCancelHelpSyn = `
|
||||
Cancels a currently running tidy operation.
|
||||
`
|
||||
|
||||
const pathTidyCancelHelpDesc = `
|
||||
This endpoint allows cancelling a currently running tidy operation.
|
||||
|
||||
Periodically throughout the invocation of tidy, we'll check if the operation
|
||||
has been requested to be cancelled. If so, we'll stop the currently running
|
||||
tidy operation.
|
||||
`
|
||||
|
||||
const pathTidyStatusHelpSyn = `
|
||||
Returns the status of the tidy operation.
|
||||
`
|
||||
|
@ -619,6 +744,16 @@ The result includes the following fields:
|
|||
* 'missing_issuer_cert_count': The number of revoked certificates which were missing a valid issuer reference
|
||||
`
|
||||
|
||||
const pathConfigAutoTidySyn = ``
|
||||
const pathConfigAutoTidySyn = `
|
||||
Modifies the current configuration for automatic tidy execution.
|
||||
`
|
||||
|
||||
const pathConfigAutoTidyDesc = ``
|
||||
const pathConfigAutoTidyDesc = `
|
||||
This endpoint accepts parameters to a tidy operation (see /tidy) that
|
||||
will be used for automatic tidy execution. This takes two extra parameters,
|
||||
enabled (to enable or disable auto-tidy) and interval_duration (which
|
||||
controls the frequency of auto-tidy execution).
|
||||
|
||||
Once enabled, a tidy operation will be kicked off automatically, as if it
|
||||
were executed with the posted configuration.
|
||||
`
|
||||
|
|
|
@ -155,3 +155,81 @@ func TestAutoTidy(t *testing.T) {
|
|||
require.Nil(t, err)
|
||||
require.Nil(t, resp)
|
||||
}
|
||||
|
||||
func TestTidyCancellation(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
numLeaves := 100
|
||||
|
||||
b, s := createBackendWithStorage(t)
|
||||
|
||||
// Create a root, a role, and a bunch of leaves.
|
||||
_, err := CBWrite(b, s, "root/generate/internal", map[string]interface{}{
|
||||
"common_name": "root example.com",
|
||||
"issuer_name": "root",
|
||||
"ttl": "20m",
|
||||
"key_type": "ec",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
_, err = CBWrite(b, s, "roles/local-testing", map[string]interface{}{
|
||||
"allow_any_name": true,
|
||||
"enforce_hostnames": false,
|
||||
"key_type": "ec",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
for i := 0; i < numLeaves; i++ {
|
||||
_, err = CBWrite(b, s, "issue/local-testing", map[string]interface{}{
|
||||
"common_name": "testing",
|
||||
"ttl": "1s",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Kick off a tidy operation (which runs in the background), but with
|
||||
// a slow-ish pause between certificates.
|
||||
_, err = CBWrite(b, s, "tidy", map[string]interface{}{
|
||||
"tidy_cert_store": true,
|
||||
"safety_buffer": "1s",
|
||||
"pause_duration": "1s",
|
||||
})
|
||||
|
||||
// If we wait six seconds, the operation should still be running. That's
|
||||
// how we check that pause_duration works.
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
resp, err := CBRead(b, s, "tidy-status")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
require.NotNil(t, resp.Data)
|
||||
require.Equal(t, resp.Data["state"], "Running")
|
||||
|
||||
// If we now cancel the operation, the response should say Cancelling.
|
||||
cancelResp, err := CBWrite(b, s, "tidy-cancel", map[string]interface{}{})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, cancelResp)
|
||||
require.NotNil(t, cancelResp.Data)
|
||||
state := cancelResp.Data["state"].(string)
|
||||
howMany := cancelResp.Data["cert_store_deleted_count"].(uint)
|
||||
|
||||
if state == "Cancelled" {
|
||||
// Rest of the test can't run; log and exit.
|
||||
t.Log("Went to cancel the operation but response was already cancelled")
|
||||
return
|
||||
}
|
||||
|
||||
require.Equal(t, state, "Cancelling")
|
||||
|
||||
// Wait a little longer, and ensure we only processed at most 2 more certs
|
||||
// after the cancellation respon.
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
statusResp, err := CBRead(b, s, "tidy-status")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, statusResp)
|
||||
require.NotNil(t, statusResp.Data)
|
||||
require.Equal(t, statusResp.Data["state"], "Cancelled")
|
||||
nowMany := statusResp.Data["cert_store_deleted_count"].(uint)
|
||||
if howMany+3 <= nowMany {
|
||||
t.Fatalf("expected to only process at most 3 more certificates, but processed (%v >>> %v) certs", nowMany, howMany)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
secrets/pki: Add ability to cancel tidy operations, control tidy resource usage.
|
||||
```
|
|
@ -70,6 +70,7 @@ update your API calls accordingly.
|
|||
- [Tidy](#tidy)
|
||||
- [Configure Automatic Tidy](#configure-automatic-tidy)
|
||||
- [Tidy Status](#tidy-status)
|
||||
- [Cancel Tidy](#cancel-tidy)
|
||||
- [Cluster Scalability](#cluster-scalability)
|
||||
- [Managed Key](#managed-keys) (Enterprise Only)
|
||||
- [Vault CLI with DER/PEM responses](#vault-cli-with-der-pem-responses)
|
||||
|
@ -3197,6 +3198,18 @@ expiration time.
|
|||
the time must be after the expiration time of the certificate (according to
|
||||
the local clock) plus the duration of `safety_buffer`. Defaults to `72h`.
|
||||
|
||||
- `pause_duration` `(string: "0s")` - Specifies the duration to pause
|
||||
between tidying individual certificates. This releases the revocation
|
||||
lock and allows other operations to continue while tidy is running.
|
||||
This allows an operator to control tidy's resource utilization within
|
||||
a timespan: the LIST operation will remain in memory, but the space
|
||||
between reading, parsing, and updates on-disk cert entries will be
|
||||
increased, decreasing resource utilization.
|
||||
|
||||
~> Note: Using too long of a `pause_duration` can result in tidy operations
|
||||
not concluding during this lifetime! Using too short of a pause duration
|
||||
(but non-zero) can lead to lock contention. Use [tidy's cancellation](#cancel-tidy)
|
||||
to stop a running operation after the sleep period is over.
|
||||
|
||||
#### Sample Payload
|
||||
|
||||
|
@ -3325,6 +3338,46 @@ $ curl \
|
|||
},
|
||||
```
|
||||
|
||||
### Cancel Tidy
|
||||
|
||||
This endpoint allows cancelling a running tidy operation. It takes no
|
||||
parameter and cancels the tidy at the next available checkpoint, which
|
||||
may process additional certificates between when the operation was
|
||||
marked as cancelled and when the operation stopped.
|
||||
|
||||
The response to this endpoint is the same as the [status](#tidy-status).
|
||||
|
||||
| Method | Path |
|
||||
| :----- | :----------------- |
|
||||
| `POST` | `/pki/tidy-cancel` |
|
||||
|
||||
#### Sample Request
|
||||
|
||||
```shell-session
|
||||
$ curl \
|
||||
--header "X-Vault-Token: ..." \
|
||||
--request POST \
|
||||
http://127.0.0.1:8200/v1/pki/tidy-cancel
|
||||
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
```json
|
||||
"data": {
|
||||
"safety_buffer": 60,
|
||||
"tidy_cert_store": true,
|
||||
"tidy_revoked_certs": true,
|
||||
"error": null,
|
||||
"message": "Tidying certificate store: checking entry 234 of 488",
|
||||
"revoked_cert_deleted_count": 0,
|
||||
"cert_store_deleted_count": 2,
|
||||
"state": "Cancelling",
|
||||
"time_started": "2021-10-20T14:52:13.510161-04:00",
|
||||
"time_finished": null
|
||||
},
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cluster Scalability
|
||||
|
|
Loading…
Reference in New Issue