Rework locking within the PKI CRLBuilder (#15325)

- Do not grab a lock within the requestRebuildIfActiveNode function
   to avoid issues being called from the invalidate function
 - Leverage more atmoic operations, and only grab the lock if we are
   going to perform the rebuild.
This commit is contained in:
Steven Clark 2022-05-11 13:14:21 -04:00 committed by GitHub
parent 48f3279c49
commit 20a78a2118
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 7 deletions

View File

@ -154,6 +154,52 @@ func TestBackend_Secondary_CRL_Rebuilding(t *testing.T) {
require.Equal(t, 0, len(crl.RevokedCertificates))
}
func TestCrlRebuilder(t *testing.T) {
ctx := context.Background()
b, s := createBackendWithStorage(t)
mkc := newManagedKeyContext(ctx, b, "test")
// Write out the issuer/key to storage without going through the api call as replication would.
bundle := genCertBundle(t, b, s)
_, _, err := writeCaBundle(mkc, s, bundle, "", "")
require.NoError(t, err)
req := &logical.Request{Storage: s}
cb := crlBuilder{}
// Force an initial build
err = cb.rebuild(ctx, b, req, true)
require.NoError(t, err, "Failed to rebuild CRL")
resp := requestCrlFromBackend(t, s, b)
crl1 := parseCrlPemBytes(t, resp.Data["http_raw_body"].([]byte))
// We shouldn't rebuild within this call.
err = cb.rebuildIfForced(ctx, b, req)
require.NoError(t, err, "Failed to rebuild if forced CRL")
resp = requestCrlFromBackend(t, s, b)
crl2 := parseCrlPemBytes(t, resp.Data["http_raw_body"].([]byte))
require.Equal(t, crl1.ThisUpdate, crl2.ThisUpdate, "According to the update field, we rebuilt the CRL")
// Make sure we have ticked over to the next second
for {
diff := time.Now().Sub(crl1.ThisUpdate)
if diff.Seconds() >= 1 {
break
}
time.Sleep(100 * time.Millisecond)
}
// This should rebuild the CRL
cb.requestRebuildIfActiveNode(b)
err = cb.rebuildIfForced(ctx, b, req)
require.NoError(t, err, "Failed to rebuild if forced CRL")
resp = requestCrlFromBackend(t, s, b)
crl3 := parseCrlPemBytes(t, resp.Data["http_raw_body"].([]byte))
require.True(t, crl1.ThisUpdate.Before(crl3.ThisUpdate),
"initial crl time: %#v not before next crl rebuild time: %#v", crl1.ThisUpdate, crl3.ThisUpdate)
}
func requestCrlFromBackend(t *testing.T, s logical.Storage, b *backend) *logical.Response {
crlReq := &logical.Request{
Operation: logical.ReadOperation,

View File

@ -61,7 +61,9 @@ func (cb *crlBuilder) rebuild(ctx context.Context, b *backend, request *logical.
// requestRebuildIfActiveNode will schedule a rebuild of the CRL from the next read or write api call assuming we are the active node of a cluster
func (cb *crlBuilder) requestRebuildIfActiveNode(b *backend) {
// Only schedule us on active nodes, ignoring secondary nodes, the active can/should rebuild the CRL.
// Only schedule us on active nodes, as the active node is the only node that can rebuild/write the CRL.
// Note 1: The CRL is cluster specific, so this does need to run on the active node of a performance secondary cluster.
// Note 2: This is called by the storage invalidation function, so it should not block.
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) ||
b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) {
b.Logger().Debug("Ignoring request to schedule a CRL rebuild, not on active node.")
@ -69,19 +71,24 @@ func (cb *crlBuilder) requestRebuildIfActiveNode(b *backend) {
}
b.Logger().Info("Scheduling PKI CRL rebuild.")
cb.m.Lock()
defer cb.m.Unlock()
atomic.StoreUint32(&cb.forceRebuild, 1)
// Set the flag to 1, we don't care if we aren't the ones that actually swap it to 1.
atomic.CompareAndSwapUint32(&cb.forceRebuild, 0, 1)
}
func (cb *crlBuilder) _doRebuild(ctx context.Context, b *backend, request *logical.Request, forceNew bool, ignoreForceFlag bool) error {
cb.m.Lock()
defer cb.m.Unlock()
if cb.forceRebuild == 1 || ignoreForceFlag {
defer atomic.StoreUint32(&cb.forceRebuild, 0)
// Re-read the lock in case someone beat us to the punch between the previous load op.
forceBuildFlag := atomic.LoadUint32(&cb.forceRebuild)
if forceBuildFlag == 1 || ignoreForceFlag {
// Reset our original flag back to 0 before we start the rebuilding. This may lead to another round of
// CRL building, but we want to avoid the race condition caused by clearing the flag after we completed (An
// update/revocation occurred attempting to set the flag, after we listed the certs but before we wrote
// the CRL, so we missed the update and cleared the flag.)
atomic.CompareAndSwapUint32(&cb.forceRebuild, 1, 0)
// if forceRebuild was requested, that should force a complete rebuild even if requested not too by forceNew
myForceNew := cb.forceRebuild == 1 || forceNew
myForceNew := forceBuildFlag == 1 || forceNew
return buildCRLs(ctx, b, request, myForceNew)
}