Telemetry Metrics Configuration. (#18186)
* Telemetry Metrics Configuration. * Err Shadowing Fix (woah, semgrep is cool). * Fix TestBackend_RevokePlusTidy_Intermediate * Add Changelog. * Fix memory leak. Code cleanup as suggested by Steve. * Turn off metrics by default, breaking-change. * Show on tidy-status before start-up. * Fix tests * make fmt * Add emit metrics to periodicFunc * Test not delivering unavailable metrics + fix. * Better error message. * Fixing the false-error bug. * make fmt. * Try to fix race issue, remove confusing comments. * Switch metric counter variables to an atomic.Uint32 - Switch the metric counter variables to an atomic variable type so that we are forced to properly load/store values to it * Fix race-issue better by trying until the metric is sunk. * make fmt. * empty commit to retrigger non-race tests that all pass locally --------- Co-authored-by: Steve Clark <steven.clark@hashicorp.com>
This commit is contained in:
parent
7dff0e6ae4
commit
14adb3b825
|
@ -9,12 +9,12 @@ import (
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
atomic2 "go.uber.org/atomic"
|
||||||
|
|
||||||
"github.com/hashicorp/vault/helper/constants"
|
"github.com/hashicorp/vault/helper/constants"
|
||||||
|
|
||||||
"github.com/hashicorp/go-multierror"
|
"github.com/hashicorp/go-multierror"
|
||||||
|
|
||||||
atomic2 "go.uber.org/atomic"
|
|
||||||
|
|
||||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
|
@ -244,9 +244,12 @@ func Backend(conf *logical.BackendConfig) *backend {
|
||||||
b.lastTidy = time.Now()
|
b.lastTidy = time.Now()
|
||||||
|
|
||||||
// Metrics initialization for count of certificates in storage
|
// Metrics initialization for count of certificates in storage
|
||||||
|
b.certCountEnabled = atomic2.NewBool(false)
|
||||||
|
b.publishCertCountMetrics = atomic2.NewBool(false)
|
||||||
b.certsCounted = atomic2.NewBool(false)
|
b.certsCounted = atomic2.NewBool(false)
|
||||||
b.certCount = new(uint32)
|
b.certCountError = "Initialize Not Yet Run, Cert Counts Unavailable"
|
||||||
b.revokedCertCount = new(uint32)
|
b.certCount = &atomic.Uint32{}
|
||||||
|
b.revokedCertCount = &atomic.Uint32{}
|
||||||
b.possibleDoubleCountedSerials = make([]string, 0, 250)
|
b.possibleDoubleCountedSerials = make([]string, 0, 250)
|
||||||
b.possibleDoubleCountedRevokedSerials = make([]string, 0, 250)
|
b.possibleDoubleCountedRevokedSerials = make([]string, 0, 250)
|
||||||
|
|
||||||
|
@ -270,9 +273,12 @@ type backend struct {
|
||||||
|
|
||||||
unifiedTransferStatus *unifiedTransferStatus
|
unifiedTransferStatus *unifiedTransferStatus
|
||||||
|
|
||||||
certCount *uint32
|
certCountEnabled *atomic2.Bool
|
||||||
revokedCertCount *uint32
|
publishCertCountMetrics *atomic2.Bool
|
||||||
|
certCount *atomic.Uint32
|
||||||
|
revokedCertCount *atomic.Uint32
|
||||||
certsCounted *atomic2.Bool
|
certsCounted *atomic2.Bool
|
||||||
|
certCountError string
|
||||||
possibleDoubleCountedSerials []string
|
possibleDoubleCountedSerials []string
|
||||||
possibleDoubleCountedRevokedSerials []string
|
possibleDoubleCountedRevokedSerials []string
|
||||||
|
|
||||||
|
@ -364,9 +370,10 @@ func (b *backend) initialize(ctx context.Context, _ *logical.InitializationReque
|
||||||
// Initialize also needs to populate our certificate and revoked certificate count
|
// Initialize also needs to populate our certificate and revoked certificate count
|
||||||
err = b.initializeStoredCertificateCounts(ctx)
|
err = b.initializeStoredCertificateCounts(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
// Don't block/err initialize/startup for metrics. Context on this call can time out due to number of certificates.
|
||||||
|
b.Logger().Error("Could not initialize stored certificate counts", err)
|
||||||
|
b.certCountError = err.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -601,6 +608,13 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
|
||||||
crlErr := doCRL()
|
crlErr := doCRL()
|
||||||
tidyErr := doAutoTidy()
|
tidyErr := doAutoTidy()
|
||||||
|
|
||||||
|
// Periodically re-emit gauges so that they don't disappear/go stale
|
||||||
|
tidyConfig, err := sc.getAutoTidyConfig()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
b.emitCertStoreMetrics(tidyConfig)
|
||||||
|
|
||||||
var errors error
|
var errors error
|
||||||
if crlErr != nil {
|
if crlErr != nil {
|
||||||
errors = multierror.Append(errors, fmt.Errorf("Error building CRLs:\n - %w\n", crlErr))
|
errors = multierror.Append(errors, fmt.Errorf("Error building CRLs:\n - %w\n", crlErr))
|
||||||
|
@ -625,24 +639,51 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) initializeStoredCertificateCounts(ctx context.Context) error {
|
func (b *backend) initializeStoredCertificateCounts(ctx context.Context) error {
|
||||||
b.tidyStatusLock.RLock()
|
|
||||||
defer b.tidyStatusLock.RUnlock()
|
|
||||||
// For performance reasons, we can't lock on issuance/storage of certs until a list operation completes,
|
// For performance reasons, we can't lock on issuance/storage of certs until a list operation completes,
|
||||||
// but we want to limit possible miscounts / double-counts to over-counting, so we take the tidy lock which
|
// but we want to limit possible miscounts / double-counts to over-counting, so we take the tidy lock which
|
||||||
// prevents (most) deletions - in particular we take a read lock (sufficient to block the write lock in
|
// prevents (most) deletions - in particular we take a read lock (sufficient to block the write lock in
|
||||||
// tidyStatusStart while allowing tidy to still acquire a read lock to report via its endpoint)
|
// tidyStatusStart while allowing tidy to still acquire a read lock to report via its endpoint)
|
||||||
|
b.tidyStatusLock.RLock()
|
||||||
|
defer b.tidyStatusLock.RUnlock()
|
||||||
|
sc := b.makeStorageContext(ctx, b.storage)
|
||||||
|
config, err := sc.getAutoTidyConfig()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
b.certCountEnabled.Store(config.MaintainCount)
|
||||||
|
b.publishCertCountMetrics.Store(config.PublishMetrics)
|
||||||
|
|
||||||
|
if config.MaintainCount == false {
|
||||||
|
b.possibleDoubleCountedRevokedSerials = nil
|
||||||
|
b.possibleDoubleCountedSerials = nil
|
||||||
|
b.certsCounted.Store(true)
|
||||||
|
b.certCount.Store(0)
|
||||||
|
b.revokedCertCount.Store(0)
|
||||||
|
b.certCountError = "Cert Count is Disabled: enable via Tidy Config maintain_stored_certificate_counts"
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ideally these three things would be set in one transaction, since that isn't possible, set the counts to "0",
|
||||||
|
// first, so count will over-count (and miss putting things in deduplicate queue), rather than under-count.
|
||||||
|
b.certCount.Store(0)
|
||||||
|
b.revokedCertCount.Store(0)
|
||||||
|
b.possibleDoubleCountedRevokedSerials = nil
|
||||||
|
b.possibleDoubleCountedSerials = nil
|
||||||
|
// A cert issued or revoked here will be double-counted. That's okay, this is "best effort" metrics.
|
||||||
|
b.certsCounted.Store(false)
|
||||||
|
|
||||||
entries, err := b.storage.List(ctx, "certs/")
|
entries, err := b.storage.List(ctx, "certs/")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
atomic.AddUint32(b.certCount, uint32(len(entries)))
|
b.certCount.Add(uint32(len(entries)))
|
||||||
|
|
||||||
revokedEntries, err := b.storage.List(ctx, "revoked/")
|
revokedEntries, err := b.storage.List(ctx, "revoked/")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
atomic.AddUint32(b.revokedCertCount, uint32(len(revokedEntries)))
|
b.revokedCertCount.Add(uint32(len(revokedEntries)))
|
||||||
|
|
||||||
b.certsCounted.Store(true)
|
b.certsCounted.Store(true)
|
||||||
// Now that the metrics are set, we can switch from appending newly-stored certificates to the possible double-count
|
// Now that the metrics are set, we can switch from appending newly-stored certificates to the possible double-count
|
||||||
|
@ -723,64 +764,98 @@ func (b *backend) initializeStoredCertificateCounts(ctx context.Context) error {
|
||||||
b.possibleDoubleCountedRevokedSerials = nil
|
b.possibleDoubleCountedRevokedSerials = nil
|
||||||
b.possibleDoubleCountedSerials = nil
|
b.possibleDoubleCountedSerials = nil
|
||||||
|
|
||||||
certCount := atomic.LoadUint32(b.certCount)
|
b.emitCertStoreMetrics(config)
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(certCount))
|
|
||||||
revokedCertCount := atomic.LoadUint32(b.revokedCertCount)
|
b.certCountError = ""
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(revokedCertCount))
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
|
func (b *backend) emitCertStoreMetrics(config *tidyConfig) {
|
||||||
// eg. certsCounted := b.certsCounted.Load()
|
if config.PublishMetrics == true {
|
||||||
func (b *backend) incrementTotalCertificatesCount(certsCounted bool, newSerial string) {
|
certCount := b.certCount.Load()
|
||||||
certCount := atomic.AddUint32(b.certCount, 1)
|
b.emitTotalCertCountMetric(certCount)
|
||||||
switch {
|
revokedCertCount := b.revokedCertCount.Load()
|
||||||
case !certsCounted:
|
b.emitTotalRevokedCountMetric(revokedCertCount)
|
||||||
// This is unsafe, but a good best-attempt
|
|
||||||
if strings.HasPrefix(newSerial, "certs/") {
|
|
||||||
newSerial = newSerial[6:]
|
|
||||||
}
|
|
||||||
b.possibleDoubleCountedSerials = append(b.possibleDoubleCountedSerials, newSerial)
|
|
||||||
default:
|
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(certCount))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) decrementTotalCertificatesCountReport() {
|
// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
|
||||||
certCount := b.decrementTotalCertificatesCountNoReport()
|
// eg. certsCounted := b.certsCounted.Load()
|
||||||
|
func (b *backend) ifCountEnabledIncrementTotalCertificatesCount(certsCounted bool, newSerial string) {
|
||||||
|
if b.certCountEnabled.Load() {
|
||||||
|
certCount := b.certCount.Add(1)
|
||||||
|
switch {
|
||||||
|
case !certsCounted:
|
||||||
|
// This is unsafe, but a good best-attempt
|
||||||
|
if strings.HasPrefix(newSerial, "certs/") {
|
||||||
|
newSerial = newSerial[6:]
|
||||||
|
}
|
||||||
|
b.possibleDoubleCountedSerials = append(b.possibleDoubleCountedSerials, newSerial)
|
||||||
|
default:
|
||||||
|
if b.publishCertCountMetrics.Load() {
|
||||||
|
b.emitTotalCertCountMetric(certCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) ifCountEnabledDecrementTotalCertificatesCountReport() {
|
||||||
|
if b.certCountEnabled.Load() {
|
||||||
|
certCount := b.decrementTotalCertificatesCountNoReport()
|
||||||
|
if b.publishCertCountMetrics.Load() {
|
||||||
|
b.emitTotalCertCountMetric(certCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) emitTotalCertCountMetric(certCount uint32) {
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(certCount))
|
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(certCount))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
|
// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
|
||||||
|
// Does not respect whether-we-are-counting backend information.
|
||||||
func (b *backend) decrementTotalCertificatesCountNoReport() uint32 {
|
func (b *backend) decrementTotalCertificatesCountNoReport() uint32 {
|
||||||
newCount := atomic.AddUint32(b.certCount, ^uint32(0))
|
newCount := b.certCount.Add(^uint32(0))
|
||||||
return newCount
|
return newCount
|
||||||
}
|
}
|
||||||
|
|
||||||
// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
|
// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
|
||||||
// eg. certsCounted := b.certsCounted.Load()
|
// eg. certsCounted := b.certsCounted.Load()
|
||||||
func (b *backend) incrementTotalRevokedCertificatesCount(certsCounted bool, newSerial string) {
|
func (b *backend) ifCountEnabledIncrementTotalRevokedCertificatesCount(certsCounted bool, newSerial string) {
|
||||||
newRevokedCertCount := atomic.AddUint32(b.revokedCertCount, 1)
|
if b.certCountEnabled.Load() {
|
||||||
switch {
|
newRevokedCertCount := b.revokedCertCount.Add(1)
|
||||||
case !certsCounted:
|
switch {
|
||||||
// This is unsafe, but a good best-attempt
|
case !certsCounted:
|
||||||
if strings.HasPrefix(newSerial, "revoked/") { // allow passing in the path (revoked/serial) OR the serial
|
// This is unsafe, but a good best-attempt
|
||||||
newSerial = newSerial[8:]
|
if strings.HasPrefix(newSerial, "revoked/") { // allow passing in the path (revoked/serial) OR the serial
|
||||||
|
newSerial = newSerial[8:]
|
||||||
|
}
|
||||||
|
b.possibleDoubleCountedRevokedSerials = append(b.possibleDoubleCountedRevokedSerials, newSerial)
|
||||||
|
default:
|
||||||
|
if b.publishCertCountMetrics.Load() {
|
||||||
|
b.emitTotalRevokedCountMetric(newRevokedCertCount)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
b.possibleDoubleCountedRevokedSerials = append(b.possibleDoubleCountedRevokedSerials, newSerial)
|
|
||||||
default:
|
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(newRevokedCertCount))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) decrementTotalRevokedCertificatesCountReport() {
|
func (b *backend) ifCountEnabledDecrementTotalRevokedCertificatesCountReport() {
|
||||||
revokedCertCount := b.decrementTotalRevokedCertificatesCountNoReport()
|
if b.certCountEnabled.Load() {
|
||||||
|
revokedCertCount := b.decrementTotalRevokedCertificatesCountNoReport()
|
||||||
|
if b.publishCertCountMetrics.Load() {
|
||||||
|
b.emitTotalRevokedCountMetric(revokedCertCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *backend) emitTotalRevokedCountMetric(revokedCertCount uint32) {
|
||||||
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(revokedCertCount))
|
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(revokedCertCount))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
|
// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
|
||||||
|
// Does not respect whether-we-are-counting backend information.
|
||||||
func (b *backend) decrementTotalRevokedCertificatesCountNoReport() uint32 {
|
func (b *backend) decrementTotalRevokedCertificatesCountNoReport() uint32 {
|
||||||
newRevokedCertCount := atomic.AddUint32(b.revokedCertCount, ^uint32(0))
|
newRevokedCertCount := b.revokedCertCount.Add(^uint32(0))
|
||||||
return newRevokedCertCount
|
return newRevokedCertCount
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,6 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -3797,6 +3796,15 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set up Metric Configuration, then restart to enable it
|
||||||
|
_, err = client.Logical().Write("pki/config/auto-tidy", map[string]interface{}{
|
||||||
|
"maintain_stored_certificate_counts": true,
|
||||||
|
"publish_stored_certificate_count_metrics": true,
|
||||||
|
})
|
||||||
|
_, err = client.Logical().Write("/sys/plugins/reload/backend", map[string]interface{}{
|
||||||
|
"mounts": "pki/",
|
||||||
|
})
|
||||||
|
|
||||||
// Check the metrics initialized in order to calculate backendUUID for /pki
|
// Check the metrics initialized in order to calculate backendUUID for /pki
|
||||||
// BackendUUID not consistent during tests with UUID from /sys/mounts/pki
|
// BackendUUID not consistent during tests with UUID from /sys/mounts/pki
|
||||||
metricsSuffix := "total_certificates_stored"
|
metricsSuffix := "total_certificates_stored"
|
||||||
|
@ -3833,6 +3841,14 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
// Set up Metric Configuration, then restart to enable it
|
||||||
|
_, err = client.Logical().Write("pki2/config/auto-tidy", map[string]interface{}{
|
||||||
|
"maintain_stored_certificate_counts": true,
|
||||||
|
"publish_stored_certificate_count_metrics": true,
|
||||||
|
})
|
||||||
|
_, err = client.Logical().Write("/sys/plugins/reload/backend", map[string]interface{}{
|
||||||
|
"mounts": "pki2/",
|
||||||
|
})
|
||||||
|
|
||||||
// Create a CSR for the intermediate CA
|
// Create a CSR for the intermediate CA
|
||||||
secret, err := client.Logical().Write("pki2/intermediate/generate/internal", nil)
|
secret, err := client.Logical().Write("pki2/intermediate/generate/internal", nil)
|
||||||
|
@ -3961,6 +3977,7 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) {
|
||||||
"current_revoked_cert_count": json.Number("0"),
|
"current_revoked_cert_count": json.Number("0"),
|
||||||
"revocation_queue_deleted_count": json.Number("0"),
|
"revocation_queue_deleted_count": json.Number("0"),
|
||||||
"cross_revoked_cert_deleted_count": json.Number("0"),
|
"cross_revoked_cert_deleted_count": json.Number("0"),
|
||||||
|
"internal_backend_uuid": backendUUID,
|
||||||
}
|
}
|
||||||
// Let's copy the times from the response so that we can use deep.Equal()
|
// Let's copy the times from the response so that we can use deep.Equal()
|
||||||
timeStarted, ok := tidyStatus.Data["time_started"]
|
timeStarted, ok := tidyStatus.Data["time_started"]
|
||||||
|
@ -5590,6 +5607,14 @@ func TestBackend_InitializeCertificateCounts(t *testing.T) {
|
||||||
serials[i] = resp.Data["serial_number"].(string)
|
serials[i] = resp.Data["serial_number"].(string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Turn on certificate counting:
|
||||||
|
CBWrite(b, s, "config/auto-tidy", map[string]interface{}{
|
||||||
|
"maintain_stored_certificate_counts": true,
|
||||||
|
"publish_stored_certificate_count_metrics": false,
|
||||||
|
})
|
||||||
|
// Assert initialize from clean is correct:
|
||||||
|
b.initializeStoredCertificateCounts(ctx)
|
||||||
|
|
||||||
// Revoke certificates A + B
|
// Revoke certificates A + B
|
||||||
revocations := serials[0:2]
|
revocations := serials[0:2]
|
||||||
for _, key := range revocations {
|
for _, key := range revocations {
|
||||||
|
@ -5601,18 +5626,16 @@ func TestBackend_InitializeCertificateCounts(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert initialize from clean is correct:
|
if b.certCount.Load() != 6 {
|
||||||
b.initializeStoredCertificateCounts(ctx)
|
t.Fatalf("Failed to count six certificates root,A,B,C,D,E, instead counted %d certs", b.certCount.Load())
|
||||||
if atomic.LoadUint32(b.certCount) != 6 {
|
|
||||||
t.Fatalf("Failed to count six certificates root,A,B,C,D,E, instead counted %d certs", atomic.LoadUint32(b.certCount))
|
|
||||||
}
|
}
|
||||||
if atomic.LoadUint32(b.revokedCertCount) != 2 {
|
if b.revokedCertCount.Load() != 2 {
|
||||||
t.Fatalf("Failed to count two revoked certificates A+B, instead counted %d certs", atomic.LoadUint32(b.revokedCertCount))
|
t.Fatalf("Failed to count two revoked certificates A+B, instead counted %d certs", b.revokedCertCount.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simulates listing while initialize in progress, by "restarting it"
|
// Simulates listing while initialize in progress, by "restarting it"
|
||||||
atomic.StoreUint32(b.certCount, 0)
|
b.certCount.Store(0)
|
||||||
atomic.StoreUint32(b.revokedCertCount, 0)
|
b.revokedCertCount.Store(0)
|
||||||
b.certsCounted.Store(false)
|
b.certsCounted.Store(false)
|
||||||
|
|
||||||
// Revoke certificates C, D
|
// Revoke certificates C, D
|
||||||
|
@ -5641,12 +5664,12 @@ func TestBackend_InitializeCertificateCounts(t *testing.T) {
|
||||||
b.initializeStoredCertificateCounts(ctx)
|
b.initializeStoredCertificateCounts(ctx)
|
||||||
|
|
||||||
// Test certificate count
|
// Test certificate count
|
||||||
if atomic.LoadUint32(b.certCount) != 8 {
|
if b.certCount.Load() != 8 {
|
||||||
t.Fatalf("Failed to initialize count of certificates root, A,B,C,D,E,F,G counted %d certs", *(b.certCount))
|
t.Fatalf("Failed to initialize count of certificates root, A,B,C,D,E,F,G counted %d certs", b.certCount.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
if atomic.LoadUint32(b.revokedCertCount) != 4 {
|
if b.revokedCertCount.Load() != 4 {
|
||||||
t.Fatalf("Failed to count revoked certificates A,B,C,D counted %d certs", *(b.revokedCertCount))
|
t.Fatalf("Failed to count revoked certificates A,B,C,D counted %d certs", b.revokedCertCount.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -234,9 +234,9 @@ func fetchCertBySerial(sc *storageContext, prefix, serial string) (*logical.Stor
|
||||||
// If we fail here, we have an extra (copy) of a cert in storage, add to metrics:
|
// If we fail here, we have an extra (copy) of a cert in storage, add to metrics:
|
||||||
switch {
|
switch {
|
||||||
case strings.HasPrefix(prefix, "revoked/"):
|
case strings.HasPrefix(prefix, "revoked/"):
|
||||||
sc.Backend.incrementTotalRevokedCertificatesCount(certsCounted, path)
|
sc.Backend.ifCountEnabledIncrementTotalRevokedCertificatesCount(certsCounted, path)
|
||||||
default:
|
default:
|
||||||
sc.Backend.incrementTotalCertificatesCount(certsCounted, path)
|
sc.Backend.ifCountEnabledIncrementTotalCertificatesCount(certsCounted, path)
|
||||||
}
|
}
|
||||||
return nil, errutil.InternalError{Err: fmt.Sprintf("error deleting certificate with serial %s from old location", serial)}
|
return nil, errutil.InternalError{Err: fmt.Sprintf("error deleting certificate with serial %s from old location", serial)}
|
||||||
}
|
}
|
||||||
|
|
|
@ -978,7 +978,7 @@ func revokeCert(sc *storageContext, config *crlConfig, cert *x509.Certificate) (
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error saving revoked certificate to new location")
|
return nil, fmt.Errorf("error saving revoked certificate to new location")
|
||||||
}
|
}
|
||||||
sc.Backend.incrementTotalRevokedCertificatesCount(certsCounted, revEntry.Key)
|
sc.Backend.ifCountEnabledIncrementTotalRevokedCertificatesCount(certsCounted, revEntry.Key)
|
||||||
|
|
||||||
// If this flag is enabled after the fact, existing local entries will be published to
|
// If this flag is enabled after the fact, existing local entries will be published to
|
||||||
// the unified storage space through a periodic function.
|
// the unified storage space through a periodic function.
|
||||||
|
|
|
@ -518,6 +518,23 @@ greater period of time. By default this is zero seconds.`,
|
||||||
Default: "0s",
|
Default: "0s",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fields["maintain_stored_certificate_counts"] = &framework.FieldSchema{
|
||||||
|
Type: framework.TypeBool,
|
||||||
|
Description: `This configures whether stored certificates
|
||||||
|
are counted upon initialization of the backend, and whether during
|
||||||
|
normal operation, a running count of certificates stored is maintained.`,
|
||||||
|
Default: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
fields["publish_stored_certificate_count_metrics"] = &framework.FieldSchema{
|
||||||
|
Type: framework.TypeBool,
|
||||||
|
Description: `This configures whether the stored certificate
|
||||||
|
count is published to the metrics consumer. It does not affect if the
|
||||||
|
stored certificate count is maintained, and if maintained, it will be
|
||||||
|
available on the tidy-status endpoint.`,
|
||||||
|
Default: false,
|
||||||
|
}
|
||||||
|
|
||||||
fields["tidy_revocation_queue"] = &framework.FieldSchema{
|
fields["tidy_revocation_queue"] = &framework.FieldSchema{
|
||||||
Type: framework.TypeBool,
|
Type: framework.TypeBool,
|
||||||
Description: `Set to true to remove stale revocation queue entries
|
Description: `Set to true to remove stale revocation queue entries
|
||||||
|
|
|
@ -419,7 +419,7 @@ func (b *backend) pathIssueSignCert(ctx context.Context, req *logical.Request, d
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
||||||
}
|
}
|
||||||
b.incrementTotalCertificatesCount(certsCounted, key)
|
b.ifCountEnabledIncrementTotalCertificatesCount(certsCounted, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
if useCSR {
|
if useCSR {
|
||||||
|
|
|
@ -271,7 +271,7 @@ func (b *backend) pathCAGenerateRoot(ctx context.Context, req *logical.Request,
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
||||||
}
|
}
|
||||||
b.incrementTotalCertificatesCount(certsCounted, key)
|
b.ifCountEnabledIncrementTotalCertificatesCount(certsCounted, key)
|
||||||
|
|
||||||
// Build a fresh CRL
|
// Build a fresh CRL
|
||||||
err = b.crlBuilder.rebuild(sc, true)
|
err = b.crlBuilder.rebuild(sc, true)
|
||||||
|
@ -468,7 +468,7 @@ func (b *backend) pathIssuerSignIntermediate(ctx context.Context, req *logical.R
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
return nil, fmt.Errorf("unable to store certificate locally: %w", err)
|
||||||
}
|
}
|
||||||
b.incrementTotalCertificatesCount(certsCounted, key)
|
b.ifCountEnabledIncrementTotalCertificatesCount(certsCounted, key)
|
||||||
|
|
||||||
if parsedBundle.Certificate.MaxPathLen == 0 {
|
if parsedBundle.Certificate.MaxPathLen == 0 {
|
||||||
resp.AddWarning("Max path length of the signed certificate is zero. This certificate cannot be used to issue intermediate CA certificates.")
|
resp.AddWarning("Max path length of the signed certificate is zero. This certificate cannot be used to issue intermediate CA certificates.")
|
||||||
|
|
|
@ -70,6 +70,8 @@ type tidyConfig struct {
|
||||||
SafetyBuffer time.Duration `json:"safety_buffer"`
|
SafetyBuffer time.Duration `json:"safety_buffer"`
|
||||||
IssuerSafetyBuffer time.Duration `json:"issuer_safety_buffer"`
|
IssuerSafetyBuffer time.Duration `json:"issuer_safety_buffer"`
|
||||||
PauseDuration time.Duration `json:"pause_duration"`
|
PauseDuration time.Duration `json:"pause_duration"`
|
||||||
|
MaintainCount bool `json:"maintain_stored_certificate_counts"`
|
||||||
|
PublishMetrics bool `json:"publish_stored_certificate_count_metrics"`
|
||||||
RevocationQueue bool `json:"tidy_revocation_queue"`
|
RevocationQueue bool `json:"tidy_revocation_queue"`
|
||||||
QueueSafetyBuffer time.Duration `json:"revocation_queue_safety_buffer"`
|
QueueSafetyBuffer time.Duration `json:"revocation_queue_safety_buffer"`
|
||||||
CrossRevokedCerts bool `json:"tidy_cross_cluster_revoked_certs"`
|
CrossRevokedCerts bool `json:"tidy_cross_cluster_revoked_certs"`
|
||||||
|
@ -86,6 +88,8 @@ var defaultTidyConfig = tidyConfig{
|
||||||
SafetyBuffer: 72 * time.Hour,
|
SafetyBuffer: 72 * time.Hour,
|
||||||
IssuerSafetyBuffer: 365 * 24 * time.Hour,
|
IssuerSafetyBuffer: 365 * 24 * time.Hour,
|
||||||
PauseDuration: 0 * time.Second,
|
PauseDuration: 0 * time.Second,
|
||||||
|
MaintainCount: false,
|
||||||
|
PublishMetrics: false,
|
||||||
RevocationQueue: false,
|
RevocationQueue: false,
|
||||||
QueueSafetyBuffer: 48 * time.Hour,
|
QueueSafetyBuffer: 48 * time.Hour,
|
||||||
CrossRevokedCerts: false,
|
CrossRevokedCerts: false,
|
||||||
|
@ -973,11 +977,26 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
|
||||||
"missing_issuer_cert_count": nil,
|
"missing_issuer_cert_count": nil,
|
||||||
"current_cert_store_count": nil,
|
"current_cert_store_count": nil,
|
||||||
"current_revoked_cert_count": nil,
|
"current_revoked_cert_count": nil,
|
||||||
|
"internal_backend_uuid": nil,
|
||||||
"revocation_queue_deleted_count": nil,
|
"revocation_queue_deleted_count": nil,
|
||||||
"cross_revoked_cert_deleted_count": nil,
|
"cross_revoked_cert_deleted_count": nil,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resp.Data["internal_backend_uuid"] = b.backendUUID
|
||||||
|
|
||||||
|
if b.certCountEnabled.Load() {
|
||||||
|
resp.Data["current_cert_store_count"] = b.certCount.Load()
|
||||||
|
resp.Data["current_revoked_cert_count"] = b.revokedCertCount.Load()
|
||||||
|
if !b.certsCounted.Load() {
|
||||||
|
resp.AddWarning("Certificates in storage are still being counted, current counts provided may be " +
|
||||||
|
"inaccurate")
|
||||||
|
}
|
||||||
|
if b.certCountError != "" {
|
||||||
|
resp.Data["certificate_counting_error"] = b.certCountError
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if b.tidyStatus.state == tidyStatusInactive {
|
if b.tidyStatus.state == tidyStatusInactive {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
@ -1020,14 +1039,6 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
|
||||||
resp.Data["time_finished"] = b.tidyStatus.timeFinished
|
resp.Data["time_finished"] = b.tidyStatus.timeFinished
|
||||||
}
|
}
|
||||||
|
|
||||||
resp.Data["current_cert_store_count"] = atomic.LoadUint32(b.certCount)
|
|
||||||
resp.Data["current_revoked_cert_count"] = atomic.LoadUint32(b.revokedCertCount)
|
|
||||||
|
|
||||||
if !b.certsCounted.Load() {
|
|
||||||
resp.AddWarning("Certificates in storage are still being counted, current counts provided may be " +
|
|
||||||
"inaccurate")
|
|
||||||
}
|
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1040,19 +1051,21 @@ func (b *backend) pathConfigAutoTidyRead(ctx context.Context, req *logical.Reque
|
||||||
|
|
||||||
return &logical.Response{
|
return &logical.Response{
|
||||||
Data: map[string]interface{}{
|
Data: map[string]interface{}{
|
||||||
"enabled": config.Enabled,
|
"enabled": config.Enabled,
|
||||||
"interval_duration": int(config.Interval / time.Second),
|
"interval_duration": int(config.Interval / time.Second),
|
||||||
"tidy_cert_store": config.CertStore,
|
"tidy_cert_store": config.CertStore,
|
||||||
"tidy_revoked_certs": config.RevokedCerts,
|
"tidy_revoked_certs": config.RevokedCerts,
|
||||||
"tidy_revoked_cert_issuer_associations": config.IssuerAssocs,
|
"tidy_revoked_cert_issuer_associations": config.IssuerAssocs,
|
||||||
"tidy_expired_issuers": config.ExpiredIssuers,
|
"tidy_expired_issuers": config.ExpiredIssuers,
|
||||||
"tidy_move_legacy_ca_bundle": config.BackupBundle,
|
"safety_buffer": int(config.SafetyBuffer / time.Second),
|
||||||
"safety_buffer": int(config.SafetyBuffer / time.Second),
|
"issuer_safety_buffer": int(config.IssuerSafetyBuffer / time.Second),
|
||||||
"issuer_safety_buffer": int(config.IssuerSafetyBuffer / time.Second),
|
"pause_duration": config.PauseDuration.String(),
|
||||||
"pause_duration": config.PauseDuration.String(),
|
"publish_stored_certificate_count_metrics": config.PublishMetrics,
|
||||||
"tidy_revocation_queue": config.RevocationQueue,
|
"maintain_stored_certificate_counts": config.MaintainCount,
|
||||||
"revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second),
|
"tidy_move_legacy_ca_bundle": config.BackupBundle,
|
||||||
"tidy_cross_cluster_revoked_certs": config.CrossRevokedCerts,
|
"tidy_revocation_queue": config.RevocationQueue,
|
||||||
|
"revocation_queue_safety_buffer": int(config.QueueSafetyBuffer / time.Second),
|
||||||
|
"tidy_cross_cluster_revoked_certs": config.CrossRevokedCerts,
|
||||||
},
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -1139,6 +1152,17 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
|
||||||
return logical.ErrorResponse("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations / tidy_expired_issuers / tidy_move_legacy_ca_bundle / tidy_revocation_queue / tidy_cross_cluster_revoked_certs)."), nil
|
return logical.ErrorResponse("Auto-tidy enabled but no tidy operations were requested. Enable at least one tidy operation to be run (tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations / tidy_expired_issuers / tidy_move_legacy_ca_bundle / tidy_revocation_queue / tidy_cross_cluster_revoked_certs)."), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if maintainCountEnabledRaw, ok := d.GetOk("maintain_stored_certificate_counts"); ok {
|
||||||
|
config.MaintainCount = maintainCountEnabledRaw.(bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
if runningStorageMetricsEnabledRaw, ok := d.GetOk("publish_stored_certificate_count_metrics"); ok {
|
||||||
|
if config.MaintainCount == false {
|
||||||
|
return logical.ErrorResponse("Can not publish a running storage metrics count to metrics without first maintaining that count. Enable `maintain_stored_certificate_counts` to enable `publish_stored_certificate_count_metrics."), nil
|
||||||
|
}
|
||||||
|
config.PublishMetrics = runningStorageMetricsEnabledRaw.(bool)
|
||||||
|
}
|
||||||
|
|
||||||
if err := sc.writeAutoTidyConfig(config); err != nil {
|
if err := sc.writeAutoTidyConfig(config); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -1224,7 +1248,7 @@ func (b *backend) tidyStatusIncCertStoreCount() {
|
||||||
|
|
||||||
b.tidyStatus.certStoreDeletedCount++
|
b.tidyStatus.certStoreDeletedCount++
|
||||||
|
|
||||||
b.decrementTotalCertificatesCountReport()
|
b.ifCountEnabledDecrementTotalCertificatesCountReport()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) tidyStatusIncRevokedCertCount() {
|
func (b *backend) tidyStatusIncRevokedCertCount() {
|
||||||
|
@ -1233,7 +1257,7 @@ func (b *backend) tidyStatusIncRevokedCertCount() {
|
||||||
|
|
||||||
b.tidyStatus.revokedCertDeletedCount++
|
b.tidyStatus.revokedCertDeletedCount++
|
||||||
|
|
||||||
b.decrementTotalRevokedCertificatesCountReport()
|
b.ifCountEnabledDecrementTotalRevokedCertificatesCountReport()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *backend) tidyStatusIncMissingIssuerCertCount() {
|
func (b *backend) tidyStatusIncMissingIssuerCertCount() {
|
||||||
|
|
|
@ -2,9 +2,15 @@ package pki
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/vault/helper/testhelpers"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics"
|
||||||
|
|
||||||
"github.com/hashicorp/vault/api"
|
"github.com/hashicorp/vault/api"
|
||||||
vaulthttp "github.com/hashicorp/vault/http"
|
vaulthttp "github.com/hashicorp/vault/http"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
|
@ -405,3 +411,338 @@ func TestTidyIssuerConfig(t *testing.T) {
|
||||||
require.Equal(t, true, resp.Data["tidy_expired_issuers"])
|
require.Equal(t, true, resp.Data["tidy_expired_issuers"])
|
||||||
require.Equal(t, 5, resp.Data["issuer_safety_buffer"])
|
require.Equal(t, 5, resp.Data["issuer_safety_buffer"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestCertStorageMetrics ensures that when enabled, metrics are able to count the number of certificates in storage and
|
||||||
|
// number of revoked certificates in storage. Moreover, this test ensures that the gauge is emitted periodically, so
|
||||||
|
// that the metric does not disappear or go stale.
|
||||||
|
func TestCertStorageMetrics(t *testing.T) {
|
||||||
|
// This tests uses the same setup as TestAutoTidy
|
||||||
|
newPeriod := 1 * time.Second
|
||||||
|
|
||||||
|
// We set up a metrics accumulator
|
||||||
|
inmemSink := metrics.NewInmemSink(
|
||||||
|
2*newPeriod, // A short time period is ideal here to test metrics are emitted every periodic func
|
||||||
|
2000000*time.Hour)
|
||||||
|
|
||||||
|
metricsConf := metrics.DefaultConfig("")
|
||||||
|
metricsConf.EnableHostname = false
|
||||||
|
metricsConf.EnableHostnameLabel = false
|
||||||
|
metricsConf.EnableServiceLabel = false
|
||||||
|
metricsConf.EnableTypePrefix = false
|
||||||
|
|
||||||
|
_, err := metrics.NewGlobal(metricsConf, inmemSink)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This test requires the periodicFunc to trigger, which requires we stand
|
||||||
|
// up a full test cluster.
|
||||||
|
coreConfig := &vault.CoreConfig{
|
||||||
|
LogicalBackends: map[string]logical.Factory{
|
||||||
|
"pki": Factory,
|
||||||
|
},
|
||||||
|
// See notes below about usage of /sys/raw for reading cluster
|
||||||
|
// storage without barrier encryption.
|
||||||
|
EnableRaw: true,
|
||||||
|
RollbackPeriod: newPeriod,
|
||||||
|
}
|
||||||
|
cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{
|
||||||
|
HandlerFunc: vaulthttp.Handler,
|
||||||
|
})
|
||||||
|
cluster.Start()
|
||||||
|
defer cluster.Cleanup()
|
||||||
|
client := cluster.Cores[0].Client
|
||||||
|
|
||||||
|
// Mount PKI
|
||||||
|
err = client.Sys().Mount("pki", &api.MountInput{
|
||||||
|
Type: "pki",
|
||||||
|
Config: api.MountConfigInput{
|
||||||
|
DefaultLeaseTTL: "10m",
|
||||||
|
MaxLeaseTTL: "60m",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Generate root.
|
||||||
|
resp, err := client.Logical().Write("pki/root/generate/internal", map[string]interface{}{
|
||||||
|
"ttl": "40h",
|
||||||
|
"common_name": "Root X1",
|
||||||
|
"key_type": "ec",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp)
|
||||||
|
require.NotEmpty(t, resp.Data)
|
||||||
|
require.NotEmpty(t, resp.Data["issuer_id"])
|
||||||
|
|
||||||
|
// Set up a testing role.
|
||||||
|
_, err = client.Logical().Write("pki/roles/local-testing", map[string]interface{}{
|
||||||
|
"allow_any_name": true,
|
||||||
|
"enforce_hostnames": false,
|
||||||
|
"key_type": "ec",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Run tidy so that tidy-status is not empty
|
||||||
|
_, err = client.Logical().Write("pki/tidy", map[string]interface{}{
|
||||||
|
"tidy_revoked_certs": true,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Since certificate counts are off by default, we shouldn't see counts in the tidy status
|
||||||
|
tidyStatus, err := client.Logical().Read("pki/tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// backendUUID should exist, we need this for metrics
|
||||||
|
backendUUID := tidyStatus.Data["internal_backend_uuid"].(string)
|
||||||
|
// "current_cert_store_count", "current_revoked_cert_count"
|
||||||
|
countData, ok := tidyStatus.Data["current_cert_store_count"]
|
||||||
|
if ok && countData != nil {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but current cert store count %v appeared in tidy status in unconfigured mount", countData)
|
||||||
|
}
|
||||||
|
revokedCountData, ok := tidyStatus.Data["current_revoked_cert_count"]
|
||||||
|
if ok && revokedCountData != nil {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but revoked cert count %v appeared in tidy status in unconfigured mount", revokedCountData)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since certificate counts are off by default, those metrics should not exist yet
|
||||||
|
mostRecentInterval := inmemSink.Data()[len(inmemSink.Data())-1]
|
||||||
|
_, ok = mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_revoked_certificates_stored"]
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but revoked cert count was emitted as a metric in an unconfigured mount")
|
||||||
|
}
|
||||||
|
_, ok = mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_certificates_stored"]
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but total certificate count was emitted as a metric in an unconfigured mount")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the auto-tidy config.
|
||||||
|
_, err = client.Logical().Write("pki/config/auto-tidy", map[string]interface{}{
|
||||||
|
"enabled": true,
|
||||||
|
"interval_duration": "1s",
|
||||||
|
"tidy_cert_store": true,
|
||||||
|
"tidy_revoked_certs": true,
|
||||||
|
"safety_buffer": "1s",
|
||||||
|
"maintain_stored_certificate_counts": true,
|
||||||
|
"publish_stored_certificate_count_metrics": false,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Reload the Mount - Otherwise Stored Certificate Counts Will Not Be Populated
|
||||||
|
_, err = client.Logical().Write("/sys/plugins/reload/backend", map[string]interface{}{
|
||||||
|
"plugin": "pki",
|
||||||
|
})
|
||||||
|
|
||||||
|
// By reading the auto-tidy endpoint, we ensure that initialize has completed (which has a write lock on auto-tidy)
|
||||||
|
_, err = client.Logical().Read("/pki/config/auto-tidy")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since publish_stored_certificate_count_metrics is still false, these metrics should still not exist yet
|
||||||
|
mostRecentInterval = inmemSink.Data()[len(inmemSink.Data())-1]
|
||||||
|
_, ok = mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_revoked_certificates_stored"]
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but revoked cert count was emitted as a metric in an unconfigured mount")
|
||||||
|
}
|
||||||
|
_, ok = mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_certificates_stored"]
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Certificate counting should be off by default, but total certificate count was emitted as a metric in an unconfigured mount")
|
||||||
|
}
|
||||||
|
|
||||||
|
// But since certificate counting is on, the metrics should exist on tidyStatus endpoint:
|
||||||
|
tidyStatus, err = client.Logical().Read("pki/tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// backendUUID should exist, we need this for metrics
|
||||||
|
backendUUID = tidyStatus.Data["internal_backend_uuid"].(string)
|
||||||
|
// "current_cert_store_count", "current_revoked_cert_count"
|
||||||
|
certStoreCount, ok := tidyStatus.Data["current_cert_store_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but current cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if certStoreCount != json.Number("1") {
|
||||||
|
t.Fatalf("Only created one certificate, but a got a certificate count of %v", certStoreCount)
|
||||||
|
}
|
||||||
|
revokedCertCount, ok := tidyStatus.Data["current_revoked_cert_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but revoked cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if revokedCertCount != json.Number("0") {
|
||||||
|
t.Fatalf("Have not yet revoked a certificate, but got a revoked cert store count of %v", revokedCertCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the auto-tidy config, again, this time turning on metrics
|
||||||
|
_, err = client.Logical().Write("pki/config/auto-tidy", map[string]interface{}{
|
||||||
|
"enabled": true,
|
||||||
|
"interval_duration": "1s",
|
||||||
|
"tidy_cert_store": true,
|
||||||
|
"tidy_revoked_certs": true,
|
||||||
|
"safety_buffer": "1s",
|
||||||
|
"maintain_stored_certificate_counts": true,
|
||||||
|
"publish_stored_certificate_count_metrics": true,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Issue a cert and revoke it.
|
||||||
|
resp, err = client.Logical().Write("pki/issue/local-testing", map[string]interface{}{
|
||||||
|
"common_name": "example.com",
|
||||||
|
"ttl": "10s",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp)
|
||||||
|
require.NotNil(t, resp.Data)
|
||||||
|
require.NotEmpty(t, resp.Data["serial_number"])
|
||||||
|
require.NotEmpty(t, resp.Data["certificate"])
|
||||||
|
leafSerial := resp.Data["serial_number"].(string)
|
||||||
|
leafCert := parseCert(t, resp.Data["certificate"].(string))
|
||||||
|
|
||||||
|
// Read cert before revoking
|
||||||
|
resp, err = client.Logical().Read("pki/cert/" + leafSerial)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp)
|
||||||
|
require.NotNil(t, resp.Data)
|
||||||
|
require.NotEmpty(t, resp.Data["certificate"])
|
||||||
|
revocationTime, err := (resp.Data["revocation_time"].(json.Number)).Int64()
|
||||||
|
require.Equal(t, int64(0), revocationTime, "revocation time was not zero")
|
||||||
|
require.Empty(t, resp.Data["revocation_time_rfc3339"], "revocation_time_rfc3339 was not empty")
|
||||||
|
require.Empty(t, resp.Data["issuer_id"], "issuer_id was not empty")
|
||||||
|
|
||||||
|
_, err = client.Logical().Write("pki/revoke", map[string]interface{}{
|
||||||
|
"serial_number": leafSerial,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// We read the auto-tidy endpoint again, to ensure any metrics logic has completed (lock on config)
|
||||||
|
_, err = client.Logical().Read("/pki/config/auto-tidy")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Metrics After Cert Has Be Created and Revoked
|
||||||
|
tidyStatus, err = client.Logical().Read("pki/tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
backendUUID = tidyStatus.Data["internal_backend_uuid"].(string)
|
||||||
|
certStoreCount, ok = tidyStatus.Data["current_cert_store_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but current cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if certStoreCount != json.Number("2") {
|
||||||
|
t.Fatalf("Created root and leaf certificate, but a got a certificate count of %v", certStoreCount)
|
||||||
|
}
|
||||||
|
revokedCertCount, ok = tidyStatus.Data["current_revoked_cert_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but revoked cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if revokedCertCount != json.Number("1") {
|
||||||
|
t.Fatalf("Revoked one certificate, but got a revoked cert store count of %v", revokedCertCount)
|
||||||
|
}
|
||||||
|
// This should now be initialized
|
||||||
|
certCountError, ok := tidyStatus.Data["certificate_counting_error"]
|
||||||
|
if ok && certCountError.(string) != "" {
|
||||||
|
t.Fatalf("Expected certificate count error to disappear after initialization, but got error %v", certCountError)
|
||||||
|
}
|
||||||
|
|
||||||
|
testhelpers.RetryUntil(t, newPeriod*5, func() error {
|
||||||
|
mostRecentInterval = inmemSink.Data()[len(inmemSink.Data())-1]
|
||||||
|
revokedCertCountGaugeValue, ok := mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_revoked_certificates_stored"]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("turned on metrics, but revoked cert count was not emitted")
|
||||||
|
}
|
||||||
|
if revokedCertCountGaugeValue.Value != 1 {
|
||||||
|
return fmt.Errorf("revoked one certificate, but metrics emitted a revoked cert store count of %v", revokedCertCountGaugeValue)
|
||||||
|
}
|
||||||
|
certStoreCountGaugeValue, ok := mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_certificates_stored"]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("turned on metrics, but total certificate count was not emitted")
|
||||||
|
}
|
||||||
|
if certStoreCountGaugeValue.Value != 2 {
|
||||||
|
return fmt.Errorf("stored two certificiates, but total certificate count emitted was %v", certStoreCountGaugeValue.Value)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Wait for cert to expire and the safety buffer to elapse.
|
||||||
|
time.Sleep(time.Until(leafCert.NotAfter) + 3*time.Second)
|
||||||
|
|
||||||
|
// Wait for auto-tidy to run afterwards.
|
||||||
|
var foundTidyRunning string
|
||||||
|
var foundTidyFinished bool
|
||||||
|
timeoutChan := time.After(120 * time.Second)
|
||||||
|
for {
|
||||||
|
if foundTidyRunning != "" && foundTidyFinished {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-timeoutChan:
|
||||||
|
t.Fatalf("expected auto-tidy to run (%v) and finish (%v) before 120 seconds elapsed", foundTidyRunning, foundTidyFinished)
|
||||||
|
default:
|
||||||
|
time.Sleep(250 * time.Millisecond)
|
||||||
|
|
||||||
|
resp, err = client.Logical().Read("pki/tidy-status")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp)
|
||||||
|
require.NotNil(t, resp.Data)
|
||||||
|
require.NotEmpty(t, resp.Data["state"])
|
||||||
|
require.NotEmpty(t, resp.Data["time_started"])
|
||||||
|
state := resp.Data["state"].(string)
|
||||||
|
started := resp.Data["time_started"].(string)
|
||||||
|
t.Logf("Resp: %v", resp.Data)
|
||||||
|
|
||||||
|
// We want the _next_ tidy run after the cert expires. This
|
||||||
|
// means if we're currently finished when we hit this the
|
||||||
|
// first time, we want to wait for the next run.
|
||||||
|
if foundTidyRunning == "" {
|
||||||
|
foundTidyRunning = started
|
||||||
|
} else if foundTidyRunning != started && !foundTidyFinished && state == "Finished" {
|
||||||
|
foundTidyFinished = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// After Tidy, Cert Store Count Should Still Be Available, and Be Updated:
|
||||||
|
// Check Metrics After Cert Has Be Created and Revoked
|
||||||
|
tidyStatus, err = client.Logical().Read("pki/tidy-status")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
backendUUID = tidyStatus.Data["internal_backend_uuid"].(string)
|
||||||
|
// "current_cert_store_count", "current_revoked_cert_count"
|
||||||
|
certStoreCount, ok = tidyStatus.Data["current_cert_store_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but current cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if certStoreCount != json.Number("1") {
|
||||||
|
t.Fatalf("Created root and leaf certificate, deleted leaf, but a got a certificate count of %v", certStoreCount)
|
||||||
|
}
|
||||||
|
revokedCertCount, ok = tidyStatus.Data["current_revoked_cert_count"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Certificate counting has been turned on, but revoked cert store count does not appear in tidy status")
|
||||||
|
}
|
||||||
|
if revokedCertCount != json.Number("0") {
|
||||||
|
t.Fatalf("Revoked certificate has been tidied, but got a revoked cert store count of %v", revokedCertCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
testhelpers.RetryUntil(t, newPeriod*5, func() error {
|
||||||
|
mostRecentInterval = inmemSink.Data()[len(inmemSink.Data())-1]
|
||||||
|
revokedCertCountGaugeValue, ok := mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_revoked_certificates_stored"]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("turned on metrics, but revoked cert count was not emitted")
|
||||||
|
}
|
||||||
|
if revokedCertCountGaugeValue.Value != 0 {
|
||||||
|
return fmt.Errorf("revoked certificate has been tidied, but metrics emitted a revoked cert store count of %v", revokedCertCountGaugeValue)
|
||||||
|
}
|
||||||
|
certStoreCountGaugeValue, ok := mostRecentInterval.Gauges["secrets.pki."+backendUUID+".total_certificates_stored"]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("turned on metrics, but total certificate count was not emitted")
|
||||||
|
}
|
||||||
|
if certStoreCountGaugeValue.Value != 1 {
|
||||||
|
return fmt.Errorf("only one of two certificates left after tidy, but total certificate count emitted was %v", certStoreCountGaugeValue.Value)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -1351,7 +1351,33 @@ func (sc *storageContext) writeAutoTidyConfig(config *tidyConfig) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return sc.Storage.Put(sc.Context, entry)
|
err = sc.Storage.Put(sc.Context, entry)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sc.Backend.publishCertCountMetrics.Store(config.PublishMetrics)
|
||||||
|
|
||||||
|
// To Potentially Disable Certificate Counting
|
||||||
|
if config.MaintainCount == false {
|
||||||
|
certCountWasEnabled := sc.Backend.certCountEnabled.Swap(config.MaintainCount)
|
||||||
|
if certCountWasEnabled {
|
||||||
|
sc.Backend.certsCounted.Store(true)
|
||||||
|
sc.Backend.certCountError = "Cert Count is Disabled: enable via Tidy Config maintain_stored_certificate_counts"
|
||||||
|
sc.Backend.possibleDoubleCountedSerials = nil // This won't stop a list operation, but will stop an expensive clean-up during initialize
|
||||||
|
sc.Backend.possibleDoubleCountedRevokedSerials = nil // This won't stop a list operation, but will stop an expensive clean-up during initialize
|
||||||
|
sc.Backend.certCount.Store(0)
|
||||||
|
sc.Backend.revokedCertCount.Store(0)
|
||||||
|
}
|
||||||
|
} else { // To Potentially Enable Certificate Counting
|
||||||
|
if sc.Backend.certCountEnabled.Load() == false {
|
||||||
|
// We haven't written "re-enable certificate counts" outside the initialize function
|
||||||
|
// Any call derived call to do so is likely to time out on ~2 million certs
|
||||||
|
sc.Backend.certCountError = "Certificate Counting Has Not Been Initialized, re-initialize this mount"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sc *storageContext) listRevokedCerts() ([]string, error) {
|
func (sc *storageContext) listRevokedCerts() ([]string, error) {
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
```release-note:breaking-change
|
||||||
|
secrets/pki: Maintaining running count of certificates will be turned off by default.
|
||||||
|
To re-enable keeping these metrics available on the tidy status endpoint, enable
|
||||||
|
maintain_stored_certificate_counts on tidy-config, to also publish them to the
|
||||||
|
metrics consumer, enable publish_stored_certificate_count_metrics .
|
||||||
|
```
|
Loading…
Reference in New Issue