VAULT-444: Add PKI tidy-status endpoint. (#12885)
VAULT-444: Add PKI tidy-status endpoint. Add metrics so that the PKI tidy status can be monitored using telemetry as well. Co-authored-by: Steven Clark <steven.clark@hashicorp.com>
This commit is contained in:
parent
d37da52974
commit
f6e35369f0
|
@ -3,11 +3,11 @@ package pki
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/vault/sdk/framework"
|
"github.com/hashicorp/vault/sdk/framework"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
|
"github.com/hashicorp/vault/vault"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Factory creates a new backend implementing the logical.Backend interface
|
// Factory creates a new backend implementing the logical.Backend interface
|
||||||
|
@ -75,6 +75,7 @@ func Backend(conf *logical.BackendConfig) *backend {
|
||||||
pathFetchListCerts(&b),
|
pathFetchListCerts(&b),
|
||||||
pathRevoke(&b),
|
pathRevoke(&b),
|
||||||
pathTidy(&b),
|
pathTidy(&b),
|
||||||
|
pathTidyStatus(&b),
|
||||||
},
|
},
|
||||||
|
|
||||||
Secrets: []*framework.Secret{
|
Secrets: []*framework.Secret{
|
||||||
|
@ -86,6 +87,7 @@ func Backend(conf *logical.BackendConfig) *backend {
|
||||||
|
|
||||||
b.crlLifetime = time.Hour * 72
|
b.crlLifetime = time.Hour * 72
|
||||||
b.tidyCASGuard = new(uint32)
|
b.tidyCASGuard = new(uint32)
|
||||||
|
b.tidyStatus = &tidyStatus{state: tidyStatusInactive}
|
||||||
b.storage = conf.StorageView
|
b.storage = conf.StorageView
|
||||||
|
|
||||||
return &b
|
return &b
|
||||||
|
@ -96,8 +98,36 @@ type backend struct {
|
||||||
|
|
||||||
storage logical.Storage
|
storage logical.Storage
|
||||||
crlLifetime time.Duration
|
crlLifetime time.Duration
|
||||||
revokeStorageLock sync.RWMutex
|
revokeStorageLock vault.DeadlockRWMutex
|
||||||
tidyCASGuard *uint32
|
tidyCASGuard *uint32
|
||||||
|
|
||||||
|
tidyStatusLock vault.DeadlockRWMutex
|
||||||
|
tidyStatus *tidyStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
type tidyStatusState int
|
||||||
|
|
||||||
|
const (
|
||||||
|
tidyStatusInactive tidyStatusState = iota
|
||||||
|
tidyStatusStarted
|
||||||
|
tidyStatusFinished
|
||||||
|
tidyStatusError
|
||||||
|
)
|
||||||
|
|
||||||
|
type tidyStatus struct {
|
||||||
|
// Parameters used to initiate the operation
|
||||||
|
safetyBuffer int
|
||||||
|
tidyCertStore bool
|
||||||
|
tidyRevokedCerts bool
|
||||||
|
|
||||||
|
// Status
|
||||||
|
state tidyStatusState
|
||||||
|
err error
|
||||||
|
timeStarted time.Time
|
||||||
|
timeFinished time.Time
|
||||||
|
message string
|
||||||
|
certStoreDeletedCount uint
|
||||||
|
revokedCertDeletedCount uint
|
||||||
}
|
}
|
||||||
|
|
||||||
const backendHelp = `
|
const backendHelp = `
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"crypto/x509/pkix"
|
"crypto/x509/pkix"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
@ -29,6 +30,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics"
|
||||||
"github.com/fatih/structs"
|
"github.com/fatih/structs"
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
"github.com/hashicorp/go-secure-stdlib/strutil"
|
"github.com/hashicorp/go-secure-stdlib/strutil"
|
||||||
|
@ -3092,6 +3094,22 @@ func setCerts() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
||||||
|
// Use a ridiculously long time to minimize the chance
|
||||||
|
// that we have to deal with more than one interval.
|
||||||
|
// InMemSink rounds down to an interval boundary rather than
|
||||||
|
// starting one at the time of initialization.
|
||||||
|
inmemSink := metrics.NewInmemSink(
|
||||||
|
1000000*time.Hour,
|
||||||
|
2000000*time.Hour)
|
||||||
|
|
||||||
|
metricsConf := metrics.DefaultConfig("")
|
||||||
|
metricsConf.EnableHostname = false
|
||||||
|
metricsConf.EnableHostnameLabel = false
|
||||||
|
metricsConf.EnableServiceLabel = false
|
||||||
|
metricsConf.EnableTypePrefix = false
|
||||||
|
|
||||||
|
metrics.NewGlobal(metricsConf, inmemSink)
|
||||||
|
|
||||||
// Enable PKI secret engine
|
// Enable PKI secret engine
|
||||||
coreConfig := &vault.CoreConfig{
|
coreConfig := &vault.CoreConfig{
|
||||||
LogicalBackends: map[string]logical.Factory{
|
LogicalBackends: map[string]logical.Factory{
|
||||||
|
@ -3243,6 +3261,91 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
||||||
// Sleep a bit to make sure we're past the safety buffer
|
// Sleep a bit to make sure we're past the safety buffer
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
|
|
||||||
|
// Issue a tidy-status on /pki
|
||||||
|
{
|
||||||
|
tidyStatus, err := client.Logical().Read("pki/tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedData := map[string]interface{}{
|
||||||
|
"safety_buffer": json.Number("1"),
|
||||||
|
"tidy_cert_store": true,
|
||||||
|
"tidy_revoked_certs": true,
|
||||||
|
"state": "Finished",
|
||||||
|
"error": nil,
|
||||||
|
"time_started": nil,
|
||||||
|
"time_finished": nil,
|
||||||
|
"message": nil,
|
||||||
|
"cert_store_deleted_count": json.Number("1"),
|
||||||
|
"revoked_cert_deleted_count": json.Number("1"),
|
||||||
|
}
|
||||||
|
// Let's copy the times from the response so that we can use deep.Equal()
|
||||||
|
timeStarted, ok := tidyStatus.Data["time_started"]
|
||||||
|
if !ok || timeStarted == "" {
|
||||||
|
t.Fatal("Expected tidy status response to include a value for time_started")
|
||||||
|
}
|
||||||
|
expectedData["time_started"] = timeStarted
|
||||||
|
timeFinished, ok := tidyStatus.Data["time_finished"]
|
||||||
|
if !ok || timeFinished == "" {
|
||||||
|
t.Fatal("Expected tidy status response to include a value for time_finished")
|
||||||
|
}
|
||||||
|
expectedData["time_finished"] = timeFinished
|
||||||
|
|
||||||
|
if diff := deep.Equal(expectedData, tidyStatus.Data); diff != nil {
|
||||||
|
t.Fatal(diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check the tidy metrics
|
||||||
|
{
|
||||||
|
// Map of gagues to expected value
|
||||||
|
expectedGauges := map[string]float32{
|
||||||
|
"secrets.pki.tidy.cert_store_current_entry": 0,
|
||||||
|
"secrets.pki.tidy.cert_store_total_entries": 1,
|
||||||
|
"secrets.pki.tidy.revoked_cert_current_entry": 0,
|
||||||
|
"secrets.pki.tidy.revoked_cert_total_entries": 1,
|
||||||
|
"secrets.pki.tidy.start_time_epoch": 0,
|
||||||
|
}
|
||||||
|
// Map of counters to the sum of the metrics for that counter
|
||||||
|
expectedCounters := map[string]float64{
|
||||||
|
"secrets.pki.tidy.cert_store_deleted_count": 1,
|
||||||
|
"secrets.pki.tidy.revoked_cert_deleted_count": 1,
|
||||||
|
"secrets.pki.tidy.success": 2,
|
||||||
|
// Note that "secrets.pki.tidy.failure" won't be in the captured metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the metrics span mnore than one interval, skip the checks
|
||||||
|
intervals := inmemSink.Data()
|
||||||
|
if len(intervals) == 1 {
|
||||||
|
interval := inmemSink.Data()[0]
|
||||||
|
|
||||||
|
for gauge, value := range expectedGauges {
|
||||||
|
if _, ok := interval.Gauges[gauge]; !ok {
|
||||||
|
t.Fatalf("Expected metrics to include a value for gauge %s", gauge)
|
||||||
|
}
|
||||||
|
if value != interval.Gauges[gauge].Value {
|
||||||
|
t.Fatalf("Expected value metric %s to be %f but got %f", gauge, value, interval.Gauges[gauge].Value)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
for counter, value := range expectedCounters {
|
||||||
|
if _, ok := interval.Counters[counter]; !ok {
|
||||||
|
t.Fatalf("Expected metrics to include a value for couter %s", counter)
|
||||||
|
}
|
||||||
|
if value != interval.Counters[counter].Sum {
|
||||||
|
t.Fatalf("Expected the sum of metric %s to be %f but got %f", counter, value, interval.Counters[counter].Sum)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tidyDuration, ok := interval.Samples["secrets.pki.tidy.duration"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("Expected metrics to include a value for sample secrets.pki.tidy.duration")
|
||||||
|
}
|
||||||
|
if tidyDuration.Count <= 0 {
|
||||||
|
t.Fatalf("Expected metrics to have count > 0 for sample secrets.pki.tidy.duration, but got %d", tidyDuration.Count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
req = client.NewRequest("GET", "/v1/pki/crl")
|
req = client.NewRequest("GET", "/v1/pki/crl")
|
||||||
resp, err = client.RawRequest(req)
|
resp, err = client.RawRequest(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -558,6 +558,32 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
verifyTidyStatus := func(expectedCertStoreDeleteCount int, expectedRevokedCertDeletedCount int) {
|
||||||
|
tidyStatus, err := client.Logical().Read(rootName+"tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tidyStatus.Data["state"] != "Finished" {
|
||||||
|
t.Fatalf("Expected tidy operation to be finished, but tidy-status reports its state is %v", tidyStatus.Data)
|
||||||
|
}
|
||||||
|
|
||||||
|
var count int64
|
||||||
|
if count, err = tidyStatus.Data["cert_store_deleted_count"].(json.Number).Int64(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if int64(expectedCertStoreDeleteCount) != count {
|
||||||
|
t.Fatalf("Expected %d for cert_store_deleted_count, but got %d", expectedCertStoreDeleteCount, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count, err = tidyStatus.Data["revoked_cert_deleted_count"].(json.Number).Int64(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if int64(expectedRevokedCertDeletedCount) != count {
|
||||||
|
t.Fatalf("Expected %d for revoked_cert_deleted_count, but got %d", expectedRevokedCertDeletedCount, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Validate current state of revoked certificates
|
// Validate current state of revoked certificates
|
||||||
verifyRevocation(t, intSerialNumber, true)
|
verifyRevocation(t, intSerialNumber, true)
|
||||||
|
|
||||||
|
@ -585,6 +611,8 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName,
|
||||||
|
|
||||||
// Check to make sure we still find the cert and see it on the CRL
|
// Check to make sure we still find the cert and see it on the CRL
|
||||||
verifyRevocation(t, intSerialNumber, true)
|
verifyRevocation(t, intSerialNumber, true)
|
||||||
|
|
||||||
|
verifyTidyStatus(0, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run with both values set false, nothing should happen
|
// Run with both values set false, nothing should happen
|
||||||
|
@ -606,6 +634,8 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName,
|
||||||
|
|
||||||
// Check to make sure we still find the cert and see it on the CRL
|
// Check to make sure we still find the cert and see it on the CRL
|
||||||
verifyRevocation(t, intSerialNumber, true)
|
verifyRevocation(t, intSerialNumber, true)
|
||||||
|
|
||||||
|
verifyTidyStatus(0, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run with a short safety buffer and both set to true, both should be cleared
|
// Run with a short safety buffer and both set to true, both should be cleared
|
||||||
|
@ -627,6 +657,9 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName,
|
||||||
|
|
||||||
// Check to make sure we still find the cert and see it on the CRL
|
// Check to make sure we still find the cert and see it on the CRL
|
||||||
verifyRevocation(t, intSerialNumber, false)
|
verifyRevocation(t, intSerialNumber, false)
|
||||||
|
|
||||||
|
verifyTidyStatus(1, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics"
|
||||||
"github.com/hashicorp/vault/sdk/framework"
|
"github.com/hashicorp/vault/sdk/framework"
|
||||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
|
@ -15,7 +16,7 @@ import (
|
||||||
|
|
||||||
func pathTidy(b *backend) *framework.Path {
|
func pathTidy(b *backend) *framework.Path {
|
||||||
return &framework.Path{
|
return &framework.Path{
|
||||||
Pattern: "tidy",
|
Pattern: "tidy$",
|
||||||
Fields: map[string]*framework.FieldSchema{
|
Fields: map[string]*framework.FieldSchema{
|
||||||
"tidy_cert_store": {
|
"tidy_cert_store": {
|
||||||
Type: framework.TypeBool,
|
Type: framework.TypeBool,
|
||||||
|
@ -45,8 +46,11 @@ Defaults to 72 hours.`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
Callbacks: map[logical.Operation]framework.OperationFunc{
|
Operations: map[logical.Operation]framework.OperationHandler{
|
||||||
logical.UpdateOperation: b.pathTidyWrite,
|
logical.UpdateOperation: &framework.PathOperation{
|
||||||
|
Callback: b.pathTidyWrite,
|
||||||
|
ForwardPerformanceStandby: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
HelpSynopsis: pathTidyHelpSyn,
|
HelpSynopsis: pathTidyHelpSyn,
|
||||||
|
@ -54,12 +58,21 @@ Defaults to 72 hours.`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
|
func pathTidyStatus(b *backend) *framework.Path {
|
||||||
// If we are a performance standby forward the request to the active node
|
return &framework.Path{
|
||||||
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) {
|
Pattern: "tidy-status$",
|
||||||
return nil, logical.ErrReadOnly
|
Operations: map[logical.Operation]framework.OperationHandler{
|
||||||
|
logical.ReadOperation: &framework.PathOperation{
|
||||||
|
Callback: b.pathTidyStatusRead,
|
||||||
|
ForwardPerformanceStandby: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
HelpSynopsis: pathTidyStatusHelpSyn,
|
||||||
|
HelpDescription: pathTidyStatusHelpDesc,
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
|
||||||
safetyBuffer := d.Get("safety_buffer").(int)
|
safetyBuffer := d.Get("safety_buffer").(int)
|
||||||
tidyCertStore := d.Get("tidy_cert_store").(bool)
|
tidyCertStore := d.Get("tidy_cert_store").(bool)
|
||||||
tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool)
|
tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool)
|
||||||
|
@ -86,6 +99,8 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
go func() {
|
go func() {
|
||||||
defer atomic.StoreUint32(b.tidyCASGuard, 0)
|
defer atomic.StoreUint32(b.tidyCASGuard, 0)
|
||||||
|
|
||||||
|
b.tidyStatusStart(safetyBuffer, tidyCertStore, tidyRevokedCerts || tidyRevocationList)
|
||||||
|
|
||||||
// Don't cancel when the original client request goes away
|
// Don't cancel when the original client request goes away
|
||||||
ctx = context.Background()
|
ctx = context.Background()
|
||||||
|
|
||||||
|
@ -98,7 +113,12 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
return fmt.Errorf("error fetching list of certs: %w", err)
|
return fmt.Errorf("error fetching list of certs: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, serial := range serials {
|
serialCount := len(serials)
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries"}, float32(serialCount))
|
||||||
|
for i, serial := range serials {
|
||||||
|
b.tidyStatusMessage(fmt.Sprintf("Tidying certificate store: checking entry %d of %d", i, serialCount))
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_current_entry"}, float32(i))
|
||||||
|
|
||||||
certEntry, err := req.Storage.Get(ctx, "certs/"+serial)
|
certEntry, err := req.Storage.Get(ctx, "certs/"+serial)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error fetching certificate %q: %w", serial, err)
|
return fmt.Errorf("error fetching certificate %q: %w", serial, err)
|
||||||
|
@ -109,6 +129,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
||||||
return fmt.Errorf("error deleting nil entry with serial %s: %w", serial, err)
|
return fmt.Errorf("error deleting nil entry with serial %s: %w", serial, err)
|
||||||
}
|
}
|
||||||
|
b.tidyStatusIncCertStoreCount()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,6 +138,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
||||||
return fmt.Errorf("error deleting entry with nil value with serial %s: %w", serial, err)
|
return fmt.Errorf("error deleting entry with nil value with serial %s: %w", serial, err)
|
||||||
}
|
}
|
||||||
|
b.tidyStatusIncCertStoreCount()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,6 +151,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil {
|
||||||
return fmt.Errorf("error deleting serial %q from storage: %w", serial, err)
|
return fmt.Errorf("error deleting serial %q from storage: %w", serial, err)
|
||||||
}
|
}
|
||||||
|
b.tidyStatusIncCertStoreCount()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,8 +167,14 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
return fmt.Errorf("error fetching list of revoked certs: %w", err)
|
return fmt.Errorf("error fetching list of revoked certs: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
revokedSerialsCount := len(revokedSerials)
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries"}, float32(revokedSerialsCount))
|
||||||
|
|
||||||
var revInfo revocationInfo
|
var revInfo revocationInfo
|
||||||
for _, serial := range revokedSerials {
|
for i, serial := range revokedSerials {
|
||||||
|
b.tidyStatusMessage(fmt.Sprintf("Tidying revoked certificates: checking certificate %d of %d", i, len(revokedSerials)))
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_current_entry"}, float32(i))
|
||||||
|
|
||||||
revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial)
|
revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err)
|
return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err)
|
||||||
|
@ -156,6 +185,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil {
|
if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil {
|
||||||
return fmt.Errorf("error deleting nil revoked entry with serial %s: %w", serial, err)
|
return fmt.Errorf("error deleting nil revoked entry with serial %s: %w", serial, err)
|
||||||
}
|
}
|
||||||
|
b.tidyStatusIncRevokedCertCount()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,6 +194,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil {
|
if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil {
|
||||||
return fmt.Errorf("error deleting revoked entry with nil value with serial %s: %w", serial, err)
|
return fmt.Errorf("error deleting revoked entry with nil value with serial %s: %w", serial, err)
|
||||||
}
|
}
|
||||||
|
b.tidyStatusIncRevokedCertCount()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,6 +220,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
return fmt.Errorf("error deleting serial %q from store when tidying revoked: %w", serial, err)
|
return fmt.Errorf("error deleting serial %q from store when tidying revoked: %w", serial, err)
|
||||||
}
|
}
|
||||||
rebuildCRL = true
|
rebuildCRL = true
|
||||||
|
b.tidyStatusIncRevokedCertCount()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,7 +236,9 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
|
|
||||||
if err := doTidy(); err != nil {
|
if err := doTidy(); err != nil {
|
||||||
logger.Error("error running tidy", "error", err)
|
logger.Error("error running tidy", "error", err)
|
||||||
return
|
b.tidyStatusStop(err)
|
||||||
|
} else {
|
||||||
|
b.tidyStatusStop(nil)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -213,6 +247,121 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
||||||
return logical.RespondWithStatusCode(resp, req, http.StatusAccepted)
|
return logical.RespondWithStatusCode(resp, req, http.StatusAccepted)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *backend) pathTidyStatusRead(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
|
||||||
|
// If this node is a performance secondary return an ErrReadOnly so that the request gets forwarded,
|
||||||
|
// but only if the PKI backend is not a local mount.
|
||||||
|
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary) && !b.System().LocalMount() {
|
||||||
|
return nil, logical.ErrReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
b.tidyStatusLock.RLock()
|
||||||
|
defer b.tidyStatusLock.RUnlock()
|
||||||
|
|
||||||
|
resp := &logical.Response{
|
||||||
|
Data: map[string]interface{}{
|
||||||
|
"safety_buffer": nil,
|
||||||
|
"tidy_cert_store": nil,
|
||||||
|
"tidy_revoked_certs": nil,
|
||||||
|
"state": "Inactive",
|
||||||
|
"error": nil,
|
||||||
|
"time_started": nil,
|
||||||
|
"time_finished": nil,
|
||||||
|
"message": nil,
|
||||||
|
"cert_store_deleted_count": nil,
|
||||||
|
"revoked_cert_deleted_count": nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.tidyStatus.state == tidyStatusInactive {
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Data["safety_buffer"] = b.tidyStatus.safetyBuffer
|
||||||
|
resp.Data["tidy_cert_store"] = b.tidyStatus.tidyCertStore
|
||||||
|
resp.Data["tidy_revoked_certs"] = b.tidyStatus.tidyRevokedCerts
|
||||||
|
resp.Data["time_started"] = b.tidyStatus.timeStarted
|
||||||
|
resp.Data["message"] = b.tidyStatus.message
|
||||||
|
resp.Data["cert_store_deleted_count"] = b.tidyStatus.certStoreDeletedCount
|
||||||
|
resp.Data["revoked_cert_deleted_count"] = b.tidyStatus.revokedCertDeletedCount
|
||||||
|
|
||||||
|
switch(b.tidyStatus.state) {
|
||||||
|
case tidyStatusStarted:
|
||||||
|
resp.Data["state"] = "Running"
|
||||||
|
case tidyStatusFinished:
|
||||||
|
resp.Data["state"] = "Finished"
|
||||||
|
resp.Data["time_finished"] = b.tidyStatus.timeFinished
|
||||||
|
resp.Data["message"] = nil
|
||||||
|
case tidyStatusError:
|
||||||
|
resp.Data["state"] = "Error"
|
||||||
|
resp.Data["time_finished"] = b.tidyStatus.timeFinished
|
||||||
|
resp.Data["error"] = b.tidyStatus.err.Error()
|
||||||
|
// Don't clear the message so that it serves as a hint about when
|
||||||
|
// the error ocurred.
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) tidyStatusStart(safetyBuffer int, tidyCertStore, tidyRevokedCerts bool) {
|
||||||
|
b.tidyStatusLock.Lock()
|
||||||
|
defer b.tidyStatusLock.Unlock()
|
||||||
|
|
||||||
|
b.tidyStatus = &tidyStatus{
|
||||||
|
safetyBuffer: safetyBuffer,
|
||||||
|
tidyCertStore: tidyCertStore,
|
||||||
|
tidyRevokedCerts: tidyRevokedCerts,
|
||||||
|
state: tidyStatusStarted,
|
||||||
|
timeStarted: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, float32(b.tidyStatus.timeStarted.Unix()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) tidyStatusStop(err error) {
|
||||||
|
b.tidyStatusLock.Lock()
|
||||||
|
defer b.tidyStatusLock.Unlock()
|
||||||
|
|
||||||
|
b.tidyStatus.timeFinished = time.Now()
|
||||||
|
b.tidyStatus.err = err
|
||||||
|
if err == nil {
|
||||||
|
b.tidyStatus.state = tidyStatusFinished
|
||||||
|
} else {
|
||||||
|
b.tidyStatus.state = tidyStatusError
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics.MeasureSince([]string{"secrets", "pki", "tidy", "duration"}, b.tidyStatus.timeStarted)
|
||||||
|
metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, 0)
|
||||||
|
metrics.IncrCounter([]string{"secrets", "pki", "tidy", "cert_store_deleted_count"}, float32(b.tidyStatus.certStoreDeletedCount))
|
||||||
|
metrics.IncrCounter([]string{"secrets", "pki", "tidy", "revoked_cert_deleted_count"}, float32(b.tidyStatus.revokedCertDeletedCount))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
metrics.IncrCounter([]string{"secrets", "pki", "tidy", "failure"}, 1)
|
||||||
|
} else {
|
||||||
|
metrics.IncrCounter([]string{"secrets", "pki", "tidy", "success"}, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) tidyStatusMessage(msg string) {
|
||||||
|
b.tidyStatusLock.Lock()
|
||||||
|
defer b.tidyStatusLock.Unlock()
|
||||||
|
|
||||||
|
b.tidyStatus.message = msg
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) tidyStatusIncCertStoreCount() {
|
||||||
|
b.tidyStatusLock.Lock()
|
||||||
|
defer b.tidyStatusLock.Unlock()
|
||||||
|
|
||||||
|
b.tidyStatus.certStoreDeletedCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) tidyStatusIncRevokedCertCount() {
|
||||||
|
b.tidyStatusLock.Lock()
|
||||||
|
defer b.tidyStatusLock.Unlock()
|
||||||
|
|
||||||
|
b.tidyStatus.revokedCertDeletedCount++
|
||||||
|
}
|
||||||
|
|
||||||
const pathTidyHelpSyn = `
|
const pathTidyHelpSyn = `
|
||||||
Tidy up the backend by removing expired certificates, revocation information,
|
Tidy up the backend by removing expired certificates, revocation information,
|
||||||
or both.
|
or both.
|
||||||
|
@ -239,3 +388,25 @@ certificate storage or in revocation information will then be checked. If the
|
||||||
current time, minus the value of 'safety_buffer', is greater than the
|
current time, minus the value of 'safety_buffer', is greater than the
|
||||||
expiration, it will be removed.
|
expiration, it will be removed.
|
||||||
`
|
`
|
||||||
|
|
||||||
|
const pathTidyStatusHelpSyn = `
|
||||||
|
Returns the status of the tidy operation.
|
||||||
|
`
|
||||||
|
|
||||||
|
const pathTidyStatusHelpDesc = `
|
||||||
|
This is a read only endpoint that returns information about the current tidy
|
||||||
|
operation, or the most recent if none is currently running.
|
||||||
|
|
||||||
|
The result includes the following fields:
|
||||||
|
* 'safety_buffer': the value of this parameter when initiating the tidy operation
|
||||||
|
* 'tidy_cert_store': the value of this parameter when initiating the tidy operation
|
||||||
|
* 'tidy_revoked_certs': the value of this parameter when initiating the tidy operation
|
||||||
|
* 'state': one of "Inactive", "Running", "Finished", "Error"
|
||||||
|
* 'error': the error message, if the operation ran into an error
|
||||||
|
* 'time_started': the time the operation started
|
||||||
|
* 'time_finished': the time the operation finished
|
||||||
|
* 'message': One of "Tidying certificate store: checking entry N of TOTAL" or
|
||||||
|
"Tidying revoked certificates: checking certificate N of TOTAL"
|
||||||
|
* 'cert_store_deleted_count': The number of certificate storage entries deleted
|
||||||
|
* 'revoked_cert_deleted_count': The number of revoked certificate entries deleted
|
||||||
|
`
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:feature:
|
||||||
|
secrets/pki: Add `tidy-status` endpoint to obtain information of the current or most recent tidy operation.
|
||||||
|
```
|
|
@ -1606,6 +1606,55 @@ $ curl \
|
||||||
http://127.0.0.1:8200/v1/pki/tidy
|
http://127.0.0.1:8200/v1/pki/tidy
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Tidy Status
|
||||||
|
|
||||||
|
This is a read only endpoint that returns information about the current tidy
|
||||||
|
operation, or the most recent if none are currently running.
|
||||||
|
|
||||||
|
The result includes the following fields:
|
||||||
|
* `safety_buffer`: the value of this parameter when initiating the tidy operation
|
||||||
|
* `tidy_cert_store`: the value of this parameter when initiating the tidy operation
|
||||||
|
* `tidy_revoked_certs`: the value of this parameter when initiating the tidy operation
|
||||||
|
* `state`: one of *Inactive*, *Running*, *Finished*, *Error*
|
||||||
|
* `error`: the error message, if the operation ran into an error
|
||||||
|
* `time_started`: the time the operation started
|
||||||
|
* `time_finished`: the time the operation finished
|
||||||
|
* `message`: One of *Tidying certificate store: checking entry N of TOTAL* or
|
||||||
|
*Tidying revoked certificates: checking certificate N of TOTAL*
|
||||||
|
* `cert_store_deleted_count`: The number of certificate storage entries deleted
|
||||||
|
* `revoked_cert_deleted_count`: The number of revoked certificate entries deleted
|
||||||
|
|
||||||
|
| Method | Path |
|
||||||
|
| :----- | :----------------- |
|
||||||
|
| `GET` | `/pki/tidy-status` |
|
||||||
|
|
||||||
|
### Sample Request
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
$ curl \
|
||||||
|
--header "X-Vault-Token: ..." \
|
||||||
|
--request GET \
|
||||||
|
http://127.0.0.1:8200/v1/pki/tidy-status
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sample Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
"data": {
|
||||||
|
"safety_buffer": 60,
|
||||||
|
"tidy_cert_store": true,
|
||||||
|
"tidy_revoked_certs": true,
|
||||||
|
"error": null,
|
||||||
|
"message": "Tidying certificate store: checking entry 234 of 488",
|
||||||
|
"revoked_cert_deleted_count": 0,
|
||||||
|
"cert_store_deleted_count": 2,
|
||||||
|
"state": "Running",
|
||||||
|
"time_started": "2021-10-20T14:52:13.510161-04:00",
|
||||||
|
"time_finished": null
|
||||||
|
},
|
||||||
|
```
|
||||||
|
|
||||||
# Cluster Scalability
|
# Cluster Scalability
|
||||||
|
|
||||||
Most non-introspection operations in the PKI secrets engine require a write to
|
Most non-introspection operations in the PKI secrets engine require a write to
|
||||||
|
|
|
@ -310,6 +310,16 @@ These metrics relate to the supported [secrets engines][secrets-engines].
|
||||||
| `database.<name>.RevokeUser` | Time taken to revoke a user for the named database secrets engine `<name>`, for example: `database.postgresql-prod.RevokeUser` | ms | summary |
|
| `database.<name>.RevokeUser` | Time taken to revoke a user for the named database secrets engine `<name>`, for example: `database.postgresql-prod.RevokeUser` | ms | summary |
|
||||||
| `database.RevokeUser.error` | Number of user revocation operation errors across all database secrets engines | errors | counter |
|
| `database.RevokeUser.error` | Number of user revocation operation errors across all database secrets engines | errors | counter |
|
||||||
| `database.<name>.RevokeUser.error` | Number of user revocation operations for the named database secrets engine `<name>`, for example: `database.postgresql-prod.RevokeUser.error` | errors | counter |
|
| `database.<name>.RevokeUser.error` | Number of user revocation operations for the named database secrets engine `<name>`, for example: `database.postgresql-prod.RevokeUser.error` | errors | counter |
|
||||||
|
| `secrets.pki.tidy.cert_store_current_entry` | The index of the current entry in the certificate store being verified by the tidy operation | entry index | gauge |
|
||||||
|
| `secrets.pki.tidy.cert_store_deleted_count` | Number of entries deleted from the certificate store | entry | counter |
|
||||||
|
| `secrets.pki.tidy.cert_store_total_entries` | Number of entries in the certificate store to verify during the tidy operation | entry | gauge |
|
||||||
|
| `secrets.pki.tidy.duration` | Duration of time taken by the PKI tidy operation | ms | summary |
|
||||||
|
| `secrets.pki.tidy.failure` | Number of times the PKI tidy operation has not completed due to errors | operations | counter |
|
||||||
|
| `secrets.pki.tidy.revoked_cert_current_entry` | The index of the current revoked certificate entry in the certificate store being verified by the tidy operation | entry index | gauge |
|
||||||
|
| `secrets.pki.tidy.revoked_cert_deleted_count` | Number of entries deleted from the certificate store for revoked certificates | entry | counter |
|
||||||
|
| `secrets.pki.tidy.revoked_cert_total_entries` | Number of entries in the certificate store for revoked certificates to verify during the tidy operation | entry | gauge |
|
||||||
|
| `secrets.pki.tidy.start_time_epoch` | Start time (as seconds since Jan 1 1970) when the PKI tidy operation is active, 0 otherwise | seconds | gauge |
|
||||||
|
| `secrets.pki.tidy.success` | Number of times the PKI tidy operation has completed succcessfully | operations | counter |
|
||||||
| `vault.secret.kv.count` (cluster, namespace, mount_point) | Number of entries in each key-value secret engine. | paths | gauge |
|
| `vault.secret.kv.count` (cluster, namespace, mount_point) | Number of entries in each key-value secret engine. | paths | gauge |
|
||||||
| `vault.secret.lease.creation` (cluster, namespace, secret_engine, mount_point, creation_ttl) | Counts the number of leases created by secret engines. | leases | counter |
|
| `vault.secret.lease.creation` (cluster, namespace, secret_engine, mount_point, creation_ttl) | Counts the number of leases created by secret engines. | leases | counter |
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue