Allow unification of revocations on other clusters (#18873)

* Allow unification of revocations on other clusters

If a BYOC revocation occurred on cluster A, while the cert was initially
issued and stored on cluster B, we need to use the invalidation on the
unified entry to detect this: the revocation queues only work for
non-PoP, non-BYOC serial only revocations and thus this BYOC would be
immediately accepted on cluster A. By checking all other incoming
revocations for duplicates on a given cluster, we can ensure that
unified revocation is consistent across clusters.

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Use time-of-use locking for global revocation processing

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

---------

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
This commit is contained in:
Alexander Scheel 2023-01-27 11:34:04 -05:00 committed by GitHub
parent d12534c2bd
commit a2c84ef236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 102 additions and 18 deletions

View File

@ -474,6 +474,12 @@ func (b *backend) invalidate(ctx context.Context, key string) {
}
}
}
case strings.HasPrefix(key, unifiedRevocationReadPathPrefix):
// Three parts to this key: prefix, cluster, and serial.
split := strings.Split(key, "/")
cluster := split[len(split)-2]
serial := split[len(split)-1]
b.crlBuilder.addCertFromCrossRevocation(cluster, serial)
}
}
@ -498,6 +504,11 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
return err
}
// Then handle any unified cross-cluster revocations.
if err := b.crlBuilder.processCrossClusterRevocations(sc); err != nil {
return err
}
// Check if we're set to auto rebuild and a CRL is set to expire.
if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil {
return err

View File

@ -102,9 +102,10 @@ type crlBuilder struct {
// Global revocation queue entries get accepted by the invalidate func
// and passed to the crlBuilder for processing.
haveInitializedQueue bool
haveInitializedQueue *atomic2.Bool
revQueue *revocationQueue
removalQueue *revocationQueue
crossQueue *revocationQueue
}
const (
@ -123,8 +124,10 @@ func newCRLBuilder(canRebuild bool) *crlBuilder {
dirty: atomic2.NewBool(true),
config: defaultCrlConfig,
invalidate: atomic2.NewBool(false),
haveInitializedQueue: atomic2.NewBool(false),
revQueue: newRevocationQueue(),
removalQueue: newRevocationQueue(),
crossQueue: newRevocationQueue(),
}
}
@ -175,6 +178,19 @@ func (cb *crlBuilder) reloadConfigIfRequired(sc *storageContext) error {
return nil
}
func (cb *crlBuilder) notifyOnConfigChange(sc *storageContext, priorConfig crlConfig, newConfig crlConfig) {
// If you need to hook into a CRL configuration change across different server types
// such as primary clusters as well as performance replicas, it is easier to do here than
// in two places (API layer and in invalidateFunc)
if priorConfig.UnifiedCRL != newConfig.UnifiedCRL && newConfig.UnifiedCRL {
sc.Backend.unifiedTransferStatus.forceRun()
}
if priorConfig.UseGlobalQueue != newConfig.UseGlobalQueue && newConfig.UseGlobalQueue {
cb.haveInitializedQueue.Store(false)
}
}
func (cb *crlBuilder) getConfigWithUpdate(sc *storageContext) (*crlConfig, error) {
// Config may mutate immediately after accessing, but will be freshly
// fetched if necessary.
@ -568,9 +584,17 @@ func (cb *crlBuilder) addCertForRevocationRemoval(cluster, serial string) {
cb.removalQueue.Add(entry)
}
func (cb *crlBuilder) addCertFromCrossRevocation(cluster, serial string) {
entry := &revocationQueueEntry{
Cluster: cluster,
Serial: serial,
}
cb.crossQueue.Add(entry)
}
func (cb *crlBuilder) maybeGatherQueueForFirstProcess(sc *storageContext, isNotPerfPrimary bool) error {
// Assume holding lock.
if cb.haveInitializedQueue {
if cb.haveInitializedQueue.Load() {
return nil
}
@ -630,14 +654,6 @@ func (cb *crlBuilder) processRevocationQueue(sc *storageContext) error {
isNotPerfPrimary := sc.Backend.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
(!sc.Backend.System().LocalMount() && sc.Backend.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary))
// Before revoking certificates, we need to hold the lock for certificate
// storage. This prevents any parallel revocations and prevents us from
// multiple places. We do this before grabbing the contents of the
// revocation queues themselves, to ensure we interleave well with other
// invocations of this function and avoid duplicate work.
sc.Backend.revokeStorageLock.Lock()
defer sc.Backend.revokeStorageLock.Unlock()
if err := cb.maybeGatherQueueForFirstProcess(sc, isNotPerfPrimary); err != nil {
return fmt.Errorf("failed to gather first queue: %v", err)
}
@ -718,7 +734,7 @@ func (cb *crlBuilder) processRevocationQueue(sc *storageContext) error {
// See note in pki/backend.go; this should be empty.
cb.removalQueue.RemoveAll()
cb.haveInitializedQueue = true
cb.haveInitializedQueue.Store(true)
return nil
}
@ -744,18 +760,71 @@ func (cb *crlBuilder) processRevocationQueue(sc *storageContext) error {
cb.removalQueue.Remove(entry)
}
cb.haveInitializedQueue = true
cb.haveInitializedQueue.Store(true)
return nil
}
func (cb *crlBuilder) notifyOnConfigChange(sc *storageContext, priorConfig crlConfig, newConfig crlConfig) {
// If you need to hook into a CRL configuration change across different server types
// such as primary clusters as well as performance replicas, it is easier to do here than
// in two places (API layer and in invalidateFunc)
if priorConfig.UnifiedCRL != newConfig.UnifiedCRL && newConfig.UnifiedCRL {
sc.Backend.unifiedTransferStatus.forceRun()
func (cb *crlBuilder) processCrossClusterRevocations(sc *storageContext) error {
sc.Backend.Logger().Debug(fmt.Sprintf("starting to process unified revocations"))
crlConfig, err := cb.getConfigWithUpdate(sc)
if err != nil {
return err
}
if !crlConfig.UnifiedCRL {
cb.crossQueue.RemoveAll()
return nil
}
crossQueue := cb.crossQueue.Iterate()
sc.Backend.Logger().Debug(fmt.Sprintf("gathered %v unified revocations entries", len(crossQueue)))
ourClusterId, err := sc.Backend.System().ClusterID(sc.Context)
if err != nil {
return fmt.Errorf("unable to fetch clusterID to ignore local unified revocation entries: %w", err)
}
for _, req := range crossQueue {
// Regardless of whether we're on the perf primary or a secondary
// cluster, we can safely ignore revocation requests originating
// from our node, because we've already checked them once (when
// they were created).
if ourClusterId != "" && ourClusterId == req.Cluster {
continue
}
// Fetch the revocation entry to ensure it exists and this wasn't
// a delete.
rPath := unifiedRevocationReadPathPrefix + req.Cluster + "/" + req.Serial
entry, err := sc.Storage.Get(sc.Context, rPath)
if err != nil {
return fmt.Errorf("failed to read unified revocation entry: %w", err)
}
if entry == nil {
// Skip this entry: it was likely caused by the deletion of this
// record during tidy.
cb.crossQueue.Remove(req)
continue
}
resp, err := tryRevokeCertBySerial(sc, crlConfig, req.Serial)
if err == nil && resp != nil && !resp.IsError() && resp.Data != nil && resp.Data["state"].(string) == "revoked" {
// We could theoretically save ourselves from writing a global
// revocation entry during the above certificate revocation, as
// we don't really need it to appear on either the unified CRL
// or its delta CRL, but this would require more plumbing.
cb.crossQueue.Remove(req)
} else if err != nil {
// Because we fake being from a lease, we get the guarantee that
// err == nil == resp if the cert was already revoked; this means
// this err should actually be fatal.
return err
}
}
return nil
}
// Helper function to fetch a map of issuerID->parsed cert for revocation
@ -805,6 +874,10 @@ func fetchIssuerMapForRevocationChecking(sc *storageContext) (map[issuerID]*x509
// Revoke a certificate from a given serial number if it is present in local
// storage.
func tryRevokeCertBySerial(sc *storageContext, config *crlConfig, serial string) (*logical.Response, error) {
// revokeCert requires us to hold these locks before calling it.
sc.Backend.revokeStorageLock.Lock()
defer sc.Backend.revokeStorageLock.Unlock()
certEntry, err := fetchCertBySerial(sc, "certs/", serial)
if err != nil {
switch err.(type) {