Enable periodic, automatic rebuilding of CRLs (#16762)
* Allow automatic rebuilding of CRLs When enabled, periodic rebuilding of CRLs will improve PKI mounts in two way: 1. Reduced load during periods of high (new) revocations, as the CRL isn't rebuilt after each revocation but instead on a fixed schedule. 2. Ensuring the CRL is never stale as long as the cluster remains up, by checking for next CRL expiry and regenerating CRLs before that happens. This may increase cluster load when operators have large CRLs that they'd prefer to let go stale, rather than regenerating fresh copies. In particular, we set a grace period before expiration of CRLs where, when the periodic function triggers (about once a minute), we check upcoming CRL expirations and check if we need to rebuild the CRLs. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add changelog entry Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add documentation on periodic rebuilding Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Allow modification of rollback period for testing When testing backends that use the periodic func, and specifically, testing the behavior of that periodic func, waiting for the usual 1m interval can lead to excessively long test execution. By switching to a shorter period--strictly for testing--we can make these tests execute faster. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add tests for auto-rebuilding of CRLs Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Remove non-updating getConfig variant Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Avoid double reload of config Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
This commit is contained in:
parent
e9e3b4995b
commit
cacb23bda6
|
@ -8,8 +8,6 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
atomic2 "go.uber.org/atomic"
|
||||
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
|
@ -178,7 +176,6 @@ func Backend(conf *logical.BackendConfig) *backend {
|
|||
PeriodicFunc: b.periodicFunc,
|
||||
}
|
||||
|
||||
b.crlLifetime = time.Hour * 72
|
||||
b.tidyCASGuard = new(uint32)
|
||||
b.tidyStatus = &tidyStatus{state: tidyStatusInactive}
|
||||
b.storage = conf.StorageView
|
||||
|
@ -186,8 +183,8 @@ func Backend(conf *logical.BackendConfig) *backend {
|
|||
|
||||
b.pkiStorageVersion.Store(0)
|
||||
|
||||
b.crlBuilder = &crlBuilder{}
|
||||
b.isOcspDisabled = atomic2.NewBool(false)
|
||||
b.crlBuilder = newCRLBuilder()
|
||||
|
||||
return &b
|
||||
}
|
||||
|
||||
|
@ -208,9 +205,6 @@ type backend struct {
|
|||
|
||||
// Write lock around issuers and keys.
|
||||
issuersLock sync.RWMutex
|
||||
|
||||
// Optimization to not read the CRL config on every OCSP request.
|
||||
isOcspDisabled *atomic2.Bool
|
||||
}
|
||||
|
||||
type (
|
||||
|
@ -307,8 +301,10 @@ func (b *backend) metricsWrap(callType string, roleMode int, ofunc roleOperation
|
|||
|
||||
// initialize is used to perform a possible PKI storage migration if needed
|
||||
func (b *backend) initialize(ctx context.Context, _ *logical.InitializationRequest) error {
|
||||
// load ocsp enabled status
|
||||
setOcspStatus(b, ctx)
|
||||
sc := b.makeStorageContext(ctx, b.storage)
|
||||
if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Grab the lock prior to the updating of the storage lock preventing us flipping
|
||||
// the storage flag midway through the request stream of other requests.
|
||||
|
@ -335,14 +331,6 @@ func (b *backend) initialize(ctx context.Context, _ *logical.InitializationReque
|
|||
return nil
|
||||
}
|
||||
|
||||
func setOcspStatus(b *backend, ctx context.Context) {
|
||||
sc := b.makeStorageContext(ctx, b.storage)
|
||||
config, err := sc.getRevocationConfig()
|
||||
if config != nil && err == nil {
|
||||
b.isOcspDisabled.Store(config.OcspDisable)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *backend) useLegacyBundleCaStorage() bool {
|
||||
// This helper function is here to choose whether or not we use the newer
|
||||
// issuer/key storage format or the older legacy ca bundle format.
|
||||
|
@ -398,10 +386,27 @@ func (b *backend) invalidate(ctx context.Context, key string) {
|
|||
}
|
||||
case key == "config/crl":
|
||||
// We may need to reload our OCSP status flag
|
||||
setOcspStatus(b, ctx)
|
||||
b.crlBuilder.markConfigDirty()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) error {
|
||||
return b.crlBuilder.rebuildIfForced(ctx, b, request)
|
||||
// First attempt to reload the CRL configuration.
|
||||
sc := b.makeStorageContext(ctx, request.Storage)
|
||||
if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check if we're set to auto rebuild and a CRL is set to expire.
|
||||
if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Then attempt to rebuild the CRLs if required.
|
||||
if err := b.crlBuilder.rebuildIfForced(ctx, b, request); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// All good!
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -8,7 +8,11 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/vault/api"
|
||||
vaulthttp "github.com/hashicorp/vault/http"
|
||||
"github.com/hashicorp/vault/sdk/logical"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -363,6 +367,8 @@ func TestCrlRebuilder(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBYOC(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
b, s := createBackendWithStorage(t)
|
||||
|
||||
// Create a root CA.
|
||||
|
@ -480,6 +486,8 @@ func TestBYOC(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPoP(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
b, s := createBackendWithStorage(t)
|
||||
|
||||
// Create a root CA.
|
||||
|
@ -638,6 +646,8 @@ func TestPoP(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIssuerRevocation(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
b, s := createBackendWithStorage(t)
|
||||
|
||||
// Create a root CA.
|
||||
|
@ -778,6 +788,167 @@ func TestIssuerRevocation(t *testing.T) {
|
|||
require.NotEmpty(t, resp.Data["revocation_time"])
|
||||
}
|
||||
|
||||
func TestAutoRebuild(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// While we'd like to reduce this duration, we need to wait until
|
||||
// the rollback manager timer ticks. With the new helper, we can
|
||||
// modify the rollback manager timer period directly, allowing us
|
||||
// to shorten the total test time significantly.
|
||||
newPeriod := 6 * time.Second
|
||||
gracePeriod := (newPeriod + 1*time.Second).String()
|
||||
crlTime := (newPeriod + 2*time.Second).String()
|
||||
delta := 2 * newPeriod
|
||||
|
||||
// This test requires the periodicFunc to trigger, which requires we stand
|
||||
// up a full test cluster.
|
||||
coreConfig := &vault.CoreConfig{
|
||||
LogicalBackends: map[string]logical.Factory{
|
||||
"pki": Factory,
|
||||
},
|
||||
}
|
||||
oldPeriod := vault.SetRollbackPeriodForTesting(newPeriod)
|
||||
cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{
|
||||
HandlerFunc: vaulthttp.Handler,
|
||||
})
|
||||
cluster.Start()
|
||||
defer cluster.Cleanup()
|
||||
client := cluster.Cores[0].Client
|
||||
vault.SetRollbackPeriodForTesting(oldPeriod)
|
||||
|
||||
// Mount PKI
|
||||
err := client.Sys().Mount("pki", &api.MountInput{
|
||||
Type: "pki",
|
||||
Config: api.MountConfigInput{
|
||||
DefaultLeaseTTL: "16h",
|
||||
MaxLeaseTTL: "60h",
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Generate root.
|
||||
_, err = client.Logical().Write("pki/root/generate/internal", map[string]interface{}{
|
||||
"ttl": "40h",
|
||||
"common_name": "Root X1",
|
||||
"key_type": "ec",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Setup a testing role.
|
||||
_, err = client.Logical().Write("pki/roles/local-testing", map[string]interface{}{
|
||||
"allow_any_name": true,
|
||||
"enforce_hostnames": false,
|
||||
"key_type": "ec",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Safety guard: we play with rebuild timing below. We don't expect
|
||||
// this entire test to take longer than 80s.
|
||||
_, err = client.Logical().Write("pki/config/crl", map[string]interface{}{
|
||||
"expiry": crlTime,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Issue a cert and revoke it. It should appear on the CRL right away.
|
||||
resp, err := client.Logical().Write("pki/issue/local-testing", map[string]interface{}{
|
||||
"common_name": "example.com",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
require.NotNil(t, resp.Data)
|
||||
require.NotEmpty(t, resp.Data["serial_number"])
|
||||
leafSerial := resp.Data["serial_number"].(string)
|
||||
|
||||
_, err = client.Logical().Write("pki/revoke", map[string]interface{}{
|
||||
"serial_number": leafSerial,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
crl := getCrlCertificateList(t, client, "pki")
|
||||
lastCRLNumber := crl.Version
|
||||
lastCRLExpiry := crl.NextUpdate
|
||||
requireSerialNumberInCRL(t, crl, leafSerial)
|
||||
|
||||
// Enable periodic rebuild of the CRL.
|
||||
_, err = client.Logical().Write("pki/config/crl", map[string]interface{}{
|
||||
"expiry": crlTime,
|
||||
"auto_rebuild": true,
|
||||
"auto_rebuild_grace_period": gracePeriod,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Issue a cert and revoke it.
|
||||
resp, err = client.Logical().Write("pki/issue/local-testing", map[string]interface{}{
|
||||
"common_name": "example.com",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
require.NotNil(t, resp.Data)
|
||||
require.NotEmpty(t, resp.Data["serial_number"])
|
||||
newLeafSerial := resp.Data["serial_number"].(string)
|
||||
|
||||
_, err = client.Logical().Write("pki/revoke", map[string]interface{}{
|
||||
"serial_number": newLeafSerial,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Serial should not appear on CRL.
|
||||
crl = getCrlCertificateList(t, client, "pki")
|
||||
thisCRLNumber := crl.Version
|
||||
requireSerialNumberInCRL(t, crl, leafSerial)
|
||||
|
||||
now := time.Now()
|
||||
graceInterval, _ := time.ParseDuration(gracePeriod)
|
||||
expectedUpdate := lastCRLExpiry.Add(-1 * graceInterval)
|
||||
if requireSerialNumberInCRL(nil, crl, newLeafSerial) {
|
||||
// If we somehow lagged and we ended up needing to rebuild
|
||||
// the CRL, we should avoid throwing an error.
|
||||
|
||||
if thisCRLNumber == lastCRLNumber {
|
||||
t.Fatalf("unexpected failure: last (%v) and current (%v) leaf certificate might have the same serial number?", leafSerial, newLeafSerial)
|
||||
}
|
||||
|
||||
if !now.After(expectedUpdate) {
|
||||
t.Fatalf("expected newly generated certificate with serial %v not to appear on this CRL but it did, prematurely: %v", newLeafSerial, crl)
|
||||
}
|
||||
|
||||
t.Fatalf("shouldn't be here")
|
||||
}
|
||||
|
||||
// Now, wait until we're within the grace period... Then start prompting
|
||||
// for regeneration.
|
||||
if expectedUpdate.After(now) {
|
||||
time.Sleep(expectedUpdate.Sub(now))
|
||||
}
|
||||
|
||||
// Otherwise, the absolute latest we're willing to wait is some delta
|
||||
// after CRL expiry (to let stuff regenerate &c).
|
||||
interruptChan := time.After(lastCRLExpiry.Sub(now) + delta)
|
||||
for {
|
||||
select {
|
||||
case <-interruptChan:
|
||||
t.Fatalf("expected CRL to regenerate prior to CRL expiry (plus %v grace period)", delta)
|
||||
default:
|
||||
crl = getCrlCertificateList(t, client, "pki")
|
||||
if crl.NextUpdate.Equal(lastCRLExpiry) {
|
||||
// Hack to ensure we got a net-new CRL. If we didn't, we can
|
||||
// exit this default conditional and wait for the next
|
||||
// go-round. When the timer fires, it'll populate the channel
|
||||
// and we'll exit correctly.
|
||||
time.Sleep(1 * time.Second)
|
||||
break
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
require.True(t, crl.ThisUpdate.Before(now))
|
||||
require.True(t, crl.NextUpdate.After(now))
|
||||
requireSerialNumberInCRL(t, crl, leafSerial)
|
||||
requireSerialNumberInCRL(t, crl, newLeafSerial)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func requestCrlFromBackend(t *testing.T, s logical.Storage, b *backend) *logical.Response {
|
||||
crlReq := &logical.Request{
|
||||
Operation: logical.ReadOperation,
|
||||
|
|
|
@ -13,6 +13,8 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
atomic2 "go.uber.org/atomic"
|
||||
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
|
||||
"github.com/hashicorp/vault/sdk/helper/errutil"
|
||||
|
@ -33,9 +35,15 @@ type revocationInfo struct {
|
|||
// without the actual API calls. During the storage invalidation process, we do not have the required state
|
||||
// to actually rebuild the CRLs, so we need to schedule it in a deferred fashion. This allows either
|
||||
// read or write calls to perform the operation if required, or have the flag reset upon a write operation
|
||||
//
|
||||
// The CRL builder also tracks the revocation configuration.
|
||||
type crlBuilder struct {
|
||||
m sync.Mutex
|
||||
forceRebuild uint32
|
||||
|
||||
c sync.RWMutex
|
||||
dirty *atomic2.Bool
|
||||
config crlConfig
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -43,6 +51,126 @@ const (
|
|||
_enforceForceFlag = false
|
||||
)
|
||||
|
||||
func newCRLBuilder() *crlBuilder {
|
||||
return &crlBuilder{
|
||||
dirty: atomic2.NewBool(true),
|
||||
config: defaultCrlConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (cb *crlBuilder) markConfigDirty() {
|
||||
cb.dirty.Store(true)
|
||||
}
|
||||
|
||||
func (cb *crlBuilder) reloadConfigIfRequired(sc *storageContext) error {
|
||||
if cb.dirty.Load() {
|
||||
// Acquire a write lock.
|
||||
cb.c.Lock()
|
||||
defer cb.c.Unlock()
|
||||
|
||||
if !cb.dirty.Load() {
|
||||
// Someone else might've been reloading the config; no need
|
||||
// to do it twice.
|
||||
return nil
|
||||
}
|
||||
|
||||
config, err := sc.getRevocationConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the default config if none was returned to us.
|
||||
if config != nil {
|
||||
cb.config = *config
|
||||
} else {
|
||||
cb.config = defaultCrlConfig
|
||||
}
|
||||
|
||||
// Updated the config; unset dirty.
|
||||
cb.dirty.Store(false)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cb *crlBuilder) getConfigWithUpdate(sc *storageContext) (*crlConfig, error) {
|
||||
// Config may mutate immediately after accessing, but will be freshly
|
||||
// fetched if necessary.
|
||||
if err := cb.reloadConfigIfRequired(sc); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cb.c.RLock()
|
||||
defer cb.c.RUnlock()
|
||||
|
||||
configCopy := cb.config
|
||||
return &configCopy, nil
|
||||
}
|
||||
|
||||
func (cb *crlBuilder) checkForAutoRebuild(sc *storageContext) error {
|
||||
cfg, err := cb.getConfigWithUpdate(sc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cfg.Disable || !cfg.AutoRebuild || atomic.LoadUint32(&cb.forceRebuild) == 1 {
|
||||
// Not enabled, not on auto-rebuilder, or we're already scheduled to
|
||||
// rebuild so there's no point to interrogate CRL values...
|
||||
return nil
|
||||
}
|
||||
|
||||
// Auto-Rebuild is enabled. We need to check each issuer's CRL and see
|
||||
// if its about to expire. If it is, we've gotta rebuild it (and well,
|
||||
// every other CRL since we don't have a fine-toothed rebuilder).
|
||||
//
|
||||
// We store a list of all (unique) CRLs in the cluster-local CRL
|
||||
// configuration along with their expiration dates.
|
||||
crlConfig, err := sc.getLocalCRLConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking for auto-rebuild status: unable to fetch cluster-local CRL configuration: %v", err)
|
||||
}
|
||||
|
||||
// If there's no config, assume we've gotta rebuild it to get this
|
||||
// information.
|
||||
if crlConfig == nil {
|
||||
atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the map is empty, assume we need to upgrade and schedule a
|
||||
// rebuild.
|
||||
if len(crlConfig.CRLExpirationMap) == 0 {
|
||||
atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise, check CRL's expirations and see if its zero or within
|
||||
// the grace period and act accordingly.
|
||||
now := time.Now()
|
||||
|
||||
period, err := time.ParseDuration(cfg.AutoRebuildGracePeriod)
|
||||
if err != nil {
|
||||
// This may occur if the duration is empty; in that case
|
||||
// assume the default. The default should be valid and shouldn't
|
||||
// error.
|
||||
defaultPeriod, defaultErr := time.ParseDuration(defaultCrlConfig.AutoRebuildGracePeriod)
|
||||
if defaultErr != nil {
|
||||
return fmt.Errorf("error checking for auto-rebuild status: unable to parse duration from both config's grace period (%v) and default grace period (%v):\n- config: %v\n- default: %v\n", cfg.AutoRebuildGracePeriod, defaultCrlConfig.AutoRebuildGracePeriod, err, defaultErr)
|
||||
}
|
||||
|
||||
period = defaultPeriod
|
||||
}
|
||||
|
||||
for _, value := range crlConfig.CRLExpirationMap {
|
||||
if value.IsZero() || now.After(value.Add(-1*period)) {
|
||||
atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// rebuildIfForced is to be called by readers or periodic functions that might need to trigger
|
||||
// a refresh of the CRL before the read occurs.
|
||||
func (cb *crlBuilder) rebuildIfForced(ctx context.Context, b *backend, request *logical.Request) error {
|
||||
|
@ -235,13 +363,23 @@ func revokeCert(ctx context.Context, b *backend, req *logical.Request, serial st
|
|||
}
|
||||
}
|
||||
|
||||
crlErr := b.crlBuilder.rebuild(ctx, b, req, false)
|
||||
if crlErr != nil {
|
||||
switch crlErr.(type) {
|
||||
case errutil.UserError:
|
||||
return logical.ErrorResponse(fmt.Sprintf("Error during CRL building: %s", crlErr)), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("error encountered during CRL building: %w", crlErr)
|
||||
// Fetch the config and see if we need to rebuild the CRL. If we have
|
||||
// auto building enabled, we will wait for the next rebuild period to
|
||||
// actually rebuild it.
|
||||
config, err := b.crlBuilder.getConfigWithUpdate(sc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building CRL: while updating config: %v", err)
|
||||
}
|
||||
|
||||
if !config.AutoRebuild {
|
||||
crlErr := b.crlBuilder.rebuild(ctx, b, req, false)
|
||||
if crlErr != nil {
|
||||
switch crlErr.(type) {
|
||||
case errutil.UserError:
|
||||
return logical.ErrorResponse(fmt.Sprintf("Error during CRL building: %s", crlErr)), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("error encountered during CRL building: %w", crlErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -288,6 +426,13 @@ func buildCRLs(ctx context.Context, b *backend, req *logical.Request, forceNew b
|
|||
var wasLegacy bool
|
||||
sc := b.makeStorageContext(ctx, req.Storage)
|
||||
|
||||
// First, fetch an updated copy of the CRL config. We'll pass this into
|
||||
// buildCRL.
|
||||
globalCRLConfig, err := b.crlBuilder.getConfigWithUpdate(sc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error building CRL: while updating config: %v", err)
|
||||
}
|
||||
|
||||
if !b.useLegacyBundleCaStorage() {
|
||||
issuers, err = sc.listIssuers()
|
||||
if err != nil {
|
||||
|
@ -420,9 +565,12 @@ func buildCRLs(ctx context.Context, b *backend, req *logical.Request, forceNew b
|
|||
crlConfig.CRLNumberMap[crlIdentifier] += 1
|
||||
|
||||
// Lastly, build the CRL.
|
||||
if err := buildCRL(sc, forceNew, representative, revokedCerts, crlIdentifier, crlNumber); err != nil {
|
||||
nextUpdate, err := buildCRL(sc, globalCRLConfig, forceNew, representative, revokedCerts, crlIdentifier, crlNumber)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error building CRLs: unable to build CRL for issuer (%v): %v", representative, err)
|
||||
}
|
||||
|
||||
crlConfig.CRLExpirationMap[crlIdentifier] = *nextUpdate
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -610,26 +758,19 @@ func augmentWithRevokedIssuers(issuerIDEntryMap map[issuerID]*issuerEntry, issue
|
|||
|
||||
// Builds a CRL by going through the list of revoked certificates and building
|
||||
// a new CRL with the stored revocation times and serial numbers.
|
||||
func buildCRL(sc *storageContext, forceNew bool, thisIssuerId issuerID, revoked []pkix.RevokedCertificate, identifier crlID, crlNumber int64) error {
|
||||
crlInfo, err := sc.getRevocationConfig()
|
||||
if err != nil {
|
||||
return errutil.InternalError{Err: fmt.Sprintf("error fetching CRL config information: %s", err)}
|
||||
}
|
||||
|
||||
crlLifetime := sc.Backend.crlLifetime
|
||||
func buildCRL(sc *storageContext, crlInfo *crlConfig, forceNew bool, thisIssuerId issuerID, revoked []pkix.RevokedCertificate, identifier crlID, crlNumber int64) (*time.Time, error) {
|
||||
var revokedCerts []pkix.RevokedCertificate
|
||||
|
||||
if crlInfo.Expiry != "" {
|
||||
crlDur, err := time.ParseDuration(crlInfo.Expiry)
|
||||
if err != nil {
|
||||
return errutil.InternalError{Err: fmt.Sprintf("error parsing CRL duration of %s", crlInfo.Expiry)}
|
||||
}
|
||||
crlLifetime = crlDur
|
||||
crlLifetime, err := time.ParseDuration(crlInfo.Expiry)
|
||||
if err != nil {
|
||||
return nil, errutil.InternalError{Err: fmt.Sprintf("error parsing CRL duration of %s", crlInfo.Expiry)}
|
||||
}
|
||||
|
||||
if crlInfo.Disable {
|
||||
if !forceNew {
|
||||
return nil
|
||||
// In the event of a disabled CRL, we'll have the next time set
|
||||
// to the zero time as a sentinel in case we get re-enabled.
|
||||
return &time.Time{}, nil
|
||||
}
|
||||
|
||||
// NOTE: in this case, the passed argument (revoked) is not added
|
||||
|
@ -648,23 +789,26 @@ WRITE:
|
|||
if caErr != nil {
|
||||
switch caErr.(type) {
|
||||
case errutil.UserError:
|
||||
return errutil.UserError{Err: fmt.Sprintf("could not fetch the CA certificate: %s", caErr)}
|
||||
return nil, errutil.UserError{Err: fmt.Sprintf("could not fetch the CA certificate: %s", caErr)}
|
||||
default:
|
||||
return errutil.InternalError{Err: fmt.Sprintf("error fetching CA certificate: %s", caErr)}
|
||||
return nil, errutil.InternalError{Err: fmt.Sprintf("error fetching CA certificate: %s", caErr)}
|
||||
}
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
nextUpdate := now.Add(crlLifetime)
|
||||
|
||||
revocationListTemplate := &x509.RevocationList{
|
||||
RevokedCertificates: revokedCerts,
|
||||
Number: big.NewInt(crlNumber),
|
||||
ThisUpdate: time.Now(),
|
||||
NextUpdate: time.Now().Add(crlLifetime),
|
||||
ThisUpdate: now,
|
||||
NextUpdate: nextUpdate,
|
||||
SignatureAlgorithm: signingBundle.RevocationSigAlg,
|
||||
}
|
||||
|
||||
crlBytes, err := x509.CreateRevocationList(rand.Reader, revocationListTemplate, signingBundle.Certificate, signingBundle.PrivateKey)
|
||||
if err != nil {
|
||||
return errutil.InternalError{Err: fmt.Sprintf("error creating new CRL: %s", err)}
|
||||
return nil, errutil.InternalError{Err: fmt.Sprintf("error creating new CRL: %s", err)}
|
||||
}
|
||||
|
||||
writePath := "crls/" + identifier.String()
|
||||
|
@ -679,8 +823,8 @@ WRITE:
|
|||
Value: crlBytes,
|
||||
})
|
||||
if err != nil {
|
||||
return errutil.InternalError{Err: fmt.Sprintf("error storing CRL: %s", err)}
|
||||
return nil, errutil.InternalError{Err: fmt.Sprintf("error storing CRL: %s", err)}
|
||||
}
|
||||
|
||||
return nil
|
||||
return &nextUpdate, nil
|
||||
}
|
||||
|
|
|
@ -100,7 +100,9 @@ func buildPathOcspPost(b *backend) *framework.Path {
|
|||
}
|
||||
|
||||
func (b *backend) ocspHandler(ctx context.Context, request *logical.Request, data *framework.FieldData) (*logical.Response, error) {
|
||||
if b.isOcspDisabled.Load() {
|
||||
sc := b.makeStorageContext(ctx, request.Storage)
|
||||
cfg, err := b.crlBuilder.getConfigWithUpdate(sc)
|
||||
if err != nil || cfg.OcspDisable {
|
||||
return OcspUnauthorizedResponse, nil
|
||||
}
|
||||
|
||||
|
@ -114,8 +116,6 @@ func (b *backend) ocspHandler(ctx context.Context, request *logical.Request, dat
|
|||
return OcspMalformedResponse, nil
|
||||
}
|
||||
|
||||
sc := b.makeStorageContext(ctx, request.Storage)
|
||||
|
||||
ocspStatus, err := getOcspStatus(sc, request, ocspReq)
|
||||
if err != nil {
|
||||
return logAndReturnInternalError(b, err), nil
|
||||
|
|
|
@ -12,9 +12,20 @@ import (
|
|||
|
||||
// CRLConfig holds basic CRL configuration information
|
||||
type crlConfig struct {
|
||||
Expiry string `json:"expiry"`
|
||||
Disable bool `json:"disable"`
|
||||
OcspDisable bool `json:"ocsp_disable"`
|
||||
Expiry string `json:"expiry"`
|
||||
Disable bool `json:"disable"`
|
||||
OcspDisable bool `json:"ocsp_disable"`
|
||||
AutoRebuild bool `json:"auto_rebuild"`
|
||||
AutoRebuildGracePeriod string `json:"auto_rebuild_grace_period"`
|
||||
}
|
||||
|
||||
// Implicit default values for the config if it does not exist.
|
||||
var defaultCrlConfig = crlConfig{
|
||||
Expiry: "72h",
|
||||
Disable: false,
|
||||
OcspDisable: false,
|
||||
AutoRebuild: false,
|
||||
AutoRebuildGracePeriod: "12h",
|
||||
}
|
||||
|
||||
func pathConfigCRL(b *backend) *framework.Path {
|
||||
|
@ -35,6 +46,15 @@ valid; defaults to 72 hours`,
|
|||
Type: framework.TypeBool,
|
||||
Description: `If set to true, ocsp unauthorized responses will be returned.`,
|
||||
},
|
||||
"auto_rebuild": {
|
||||
Type: framework.TypeBool,
|
||||
Description: `If set to true, enables automatic rebuilding of the CRL`,
|
||||
},
|
||||
"auto_rebuild_grace_period": {
|
||||
Type: framework.TypeString,
|
||||
Description: `The time before the CRL expires to automatically rebuild it, when enabled. Must be shorter than the CRL expiry. Defaults to 12h.`,
|
||||
Default: "12h",
|
||||
},
|
||||
},
|
||||
|
||||
Operations: map[logical.Operation]framework.OperationHandler{
|
||||
|
@ -63,9 +83,11 @@ func (b *backend) pathCRLRead(ctx context.Context, req *logical.Request, _ *fram
|
|||
|
||||
return &logical.Response{
|
||||
Data: map[string]interface{}{
|
||||
"expiry": config.Expiry,
|
||||
"disable": config.Disable,
|
||||
"ocsp_disable": config.OcspDisable,
|
||||
"expiry": config.Expiry,
|
||||
"disable": config.Disable,
|
||||
"ocsp_disable": config.OcspDisable,
|
||||
"auto_rebuild": config.AutoRebuild,
|
||||
"auto_rebuild_grace_period": config.AutoRebuildGracePeriod,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
@ -86,18 +108,37 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra
|
|||
config.Expiry = expiry
|
||||
}
|
||||
|
||||
var oldDisable bool
|
||||
oldDisable := config.Disable
|
||||
if disableRaw, ok := d.GetOk("disable"); ok {
|
||||
oldDisable = config.Disable
|
||||
config.Disable = disableRaw.(bool)
|
||||
}
|
||||
|
||||
var oldOcspDisable bool
|
||||
if ocspDisableRaw, ok := d.GetOk("ocsp_disable"); ok {
|
||||
oldOcspDisable = config.OcspDisable
|
||||
config.OcspDisable = ocspDisableRaw.(bool)
|
||||
}
|
||||
|
||||
oldAutoRebuild := config.AutoRebuild
|
||||
if autoRebuildRaw, ok := d.GetOk("auto_rebuild"); ok {
|
||||
config.AutoRebuild = autoRebuildRaw.(bool)
|
||||
}
|
||||
|
||||
if autoRebuildGracePeriodRaw, ok := d.GetOk("auto_rebuild_grace_period"); ok {
|
||||
autoRebuildGracePeriod := autoRebuildGracePeriodRaw.(string)
|
||||
if _, err := time.ParseDuration(autoRebuildGracePeriod); err != nil {
|
||||
return logical.ErrorResponse(fmt.Sprintf("given auto_rebuild_grace_period could not be decoded: %s", err)), nil
|
||||
}
|
||||
config.AutoRebuildGracePeriod = autoRebuildGracePeriod
|
||||
}
|
||||
|
||||
if config.AutoRebuild {
|
||||
expiry, _ := time.ParseDuration(config.Expiry)
|
||||
gracePeriod, _ := time.ParseDuration(config.AutoRebuildGracePeriod)
|
||||
|
||||
if gracePeriod >= expiry {
|
||||
return logical.ErrorResponse(fmt.Sprintf("CRL auto-rebuilding grace period (%v) must be strictly shorter than CRL expiry (%v) value when auto-rebuilding of CRLs is enabled", config.AutoRebuildGracePeriod, config.Expiry)), nil
|
||||
}
|
||||
}
|
||||
|
||||
entry, err := logical.StorageEntryJSON("config/crl", config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -107,8 +148,12 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra
|
|||
return nil, err
|
||||
}
|
||||
|
||||
if oldDisable != config.Disable {
|
||||
// It wasn't disabled but now it is, rotate
|
||||
b.crlBuilder.markConfigDirty()
|
||||
b.crlBuilder.reloadConfigIfRequired(sc)
|
||||
|
||||
if oldDisable != config.Disable || (oldAutoRebuild && !config.AutoRebuild) {
|
||||
// It wasn't disabled but now it is (or equivalently, we were set to
|
||||
// auto-rebuild and we aren't now), so rotate the CRL.
|
||||
crlErr := b.crlBuilder.rebuild(ctx, b, req, true)
|
||||
if crlErr != nil {
|
||||
switch crlErr.(type) {
|
||||
|
@ -120,10 +165,6 @@ func (b *backend) pathCRLWrite(ctx context.Context, req *logical.Request, d *fra
|
|||
}
|
||||
}
|
||||
|
||||
if oldOcspDisable != config.OcspDisable {
|
||||
setOcspStatus(b, ctx)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -226,9 +226,21 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
|
|||
}
|
||||
metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries_remaining"}, float32(uint(revokedSerialsCount)-b.tidyStatus.revokedCertDeletedCount))
|
||||
if rebuildCRL {
|
||||
if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil {
|
||||
// Expired certificates isn't generally an important
|
||||
// reason to trigger a CRL rebuild for. Check if
|
||||
// automatic CRL rebuilds have been enabled and defer
|
||||
// the rebuild if so.
|
||||
sc := b.makeStorageContext(ctx, req.Storage)
|
||||
config, err := sc.getRevocationConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !config.AutoRebuild {
|
||||
if err := b.crlBuilder.rebuild(ctx, b, req, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -159,8 +159,9 @@ type issuerEntry struct {
|
|||
}
|
||||
|
||||
type localCRLConfigEntry struct {
|
||||
IssuerIDCRLMap map[issuerID]crlID `json:"issuer_id_crl_map"`
|
||||
CRLNumberMap map[crlID]int64 `json:"crl_number_map"`
|
||||
IssuerIDCRLMap map[issuerID]crlID `json:"issuer_id_crl_map"`
|
||||
CRLNumberMap map[crlID]int64 `json:"crl_number_map"`
|
||||
CRLExpirationMap map[crlID]time.Time `json:"crl_expiration_map"`
|
||||
}
|
||||
|
||||
type keyConfigEntry struct {
|
||||
|
@ -803,6 +804,10 @@ func (sc *storageContext) getLocalCRLConfig() (*localCRLConfigEntry, error) {
|
|||
mapping.CRLNumberMap = make(map[crlID]int64)
|
||||
}
|
||||
|
||||
if len(mapping.CRLExpirationMap) == 0 {
|
||||
mapping.CRLExpirationMap = make(map[crlID]time.Time)
|
||||
}
|
||||
|
||||
return mapping, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
secrets/pki: Add ability to periodically rebuild CRL before expiry
|
||||
```
|
|
@ -14,10 +14,11 @@ import (
|
|||
"github.com/hashicorp/vault/sdk/logical"
|
||||
)
|
||||
|
||||
const (
|
||||
// rollbackPeriod is how often we attempt rollbacks for all the backends
|
||||
rollbackPeriod = time.Minute
|
||||
)
|
||||
// rollbackPeriod is how often we attempt rollbacks for all the backends.
|
||||
//
|
||||
// This is turned into a variable to allow test to check behavior without
|
||||
// waiting the full minute. See SetRollbackPeriodForTesting(...).
|
||||
var rollbackPeriod = time.Minute
|
||||
|
||||
// RollbackManager is responsible for performing rollbacks of partial
|
||||
// secrets within logical backends.
|
||||
|
|
|
@ -2347,3 +2347,14 @@ func RetryUntil(t testing.T, timeout time.Duration, f func() error) {
|
|||
}
|
||||
t.Fatalf("did not complete before deadline, err: %v", err)
|
||||
}
|
||||
|
||||
// SetRollbackPeriodForTesting lets us modify the periodic func invocation
|
||||
// time period to some other value. Best practice is to set this, spin up
|
||||
// a test cluster and immediately reset the value back to the default, to
|
||||
// avoid impacting other tests too much. To that end, we return the original
|
||||
// value of that period.
|
||||
func SetRollbackPeriodForTesting(newPeriod time.Duration) time.Duration {
|
||||
oldPeriod := rollbackPeriod
|
||||
rollbackPeriod = newPeriod
|
||||
return oldPeriod
|
||||
}
|
||||
|
|
|
@ -2989,7 +2989,7 @@ $ curl \
|
|||
}
|
||||
```
|
||||
|
||||
### Set CRL Configuration
|
||||
### Set Revocation Configuration
|
||||
|
||||
This endpoint allows setting the duration for which the generated CRL should be
|
||||
marked valid. If the CRL is disabled, it will return a signed but zero-length
|
||||
|
@ -3009,6 +3009,13 @@ revoked until `tidy` has been run with the desired safety buffer. Re-enabling
|
|||
CRL generation will then result in all such certificates becoming a part of
|
||||
the CRL.
|
||||
|
||||
~> Note: Enabling automatic rebuilding of CRLs disables immediate regeneration
|
||||
on revocation. This is in line with the behavior of other CAs which only
|
||||
rebuild CRLs periodically. We suggest manually hitting the rotate if a
|
||||
fresh CRL is necessary after a revocation. For the most part though, CRLs
|
||||
should not be relied upon for the latest certificate status information,
|
||||
and OCSP should be used instead.
|
||||
|
||||
| Method | Path |
|
||||
| :----- | :---------------- |
|
||||
| `POST` | `/pki/config/crl` |
|
||||
|
@ -3018,6 +3025,10 @@ the CRL.
|
|||
- `expiry` `(string: "72h")` - The amount of time the generated CRL should be valid.
|
||||
- `disable` `(bool: false)` - Disables or enables CRL building.
|
||||
- `ocsp_disable` `(bool: false)` - Disables or enables the OCSP responder in Vault.
|
||||
- `auto_rebuild` `(bool: false)` - Enables or disables periodic rebuilding of
|
||||
the CRL upon expiry.
|
||||
- `auto_rebuild_grace_period` `(string: "12h")` - Grace period before CRL expiry
|
||||
to attempt rebuild of CRL. Must be shorter than the CRL expiry period.
|
||||
|
||||
#### Sample Payload
|
||||
|
||||
|
@ -3025,7 +3036,9 @@ the CRL.
|
|||
{
|
||||
"expiry": "48h",
|
||||
"disable": "false",
|
||||
"ocsp_disable": "false"
|
||||
"ocsp_disable": "false",
|
||||
"auto_rebuild": "true",
|
||||
"auto_rebuild_grace_period": "8h"
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -3055,6 +3068,14 @@ and **must** be called on every cluster.
|
|||
to delete their issuer and is instead necessary to **remove** the mount
|
||||
completely.
|
||||
|
||||
~> **Note**: As of Vault 1.12, it is suggested to switch to enabling the CRL's
|
||||
`auto_rebuild` functionality to avoid having to manually hit the Rotate
|
||||
endpoint when the CRL expires. This ensures a valid CRL is always
|
||||
maintained, at the expense of potentially not being up-to-date. If a
|
||||
revocation occurs that must be immediately propagated, this endpoint can
|
||||
be used to regenerate the CRL, though distribution must still occur outside
|
||||
of Vault (either manually or via AIA where supported).
|
||||
|
||||
| Method | Path |
|
||||
| :----- | :---------------- |
|
||||
| `GET` | `/pki/crl/rotate` |
|
||||
|
|
Loading…
Reference in New Issue