From 1f21afba2149edf0b660282e0c9ccd0a027b1056 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Mon, 22 May 2023 14:31:19 -0400 Subject: [PATCH] Fix race in PKI's runUnifiedTransfer (#20701) * Fix race in PKI's runUnifiedTransfer During this race, we'll sometimes start (or fail to start) an additional unified transfer if the updated last run timestamp was written at the same time as another thread was reading it. Instead, delay this check until we're holding the CAS guard; this will occasionally result in more messages saying that an existing process is already running, but otherwise shouldn't impact the functionality at all. Signed-off-by: Alexander Scheel * Add changelog entry Signed-off-by: Alexander Scheel --------- Signed-off-by: Alexander Scheel --- builtin/logical/pki/periodic.go | 20 +++++++++++--------- changelog/20701.txt | 3 +++ 2 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 changelog/20701.txt diff --git a/builtin/logical/pki/periodic.go b/builtin/logical/pki/periodic.go index b8119e0cd..77ff31212 100644 --- a/builtin/logical/pki/periodic.go +++ b/builtin/logical/pki/periodic.go @@ -53,15 +53,6 @@ func runUnifiedTransfer(sc *storageContext) { return } - if !status.lastRun.IsZero() { - // We have run before, we only run again if we have - // been requested to forceRerun, and we haven't run since our - // minimum delay - if !(status.forceRerun.Load() && time.Since(status.lastRun) < minUnifiedTransferDelay) { - return - } - } - if !config.UnifiedCRL { // Feature is disabled, no need to run return @@ -80,6 +71,17 @@ func runUnifiedTransfer(sc *storageContext) { } defer status.isRunning.Store(false) + // Because access to lastRun is not locked, we need to delay this check + // until after we grab the isRunning CAS lock. + if !status.lastRun.IsZero() { + // We have run before, we only run again if we have + // been requested to forceRerun, and we haven't run since our + // minimum delay. + if !(status.forceRerun.Load() && time.Since(status.lastRun) < minUnifiedTransferDelay) { + return + } + } + // Reset our flag before we begin, we do this before we start as // we can't guarantee that we can properly parse/fix the error from an // error that comes in from the revoke API after that. This will diff --git a/changelog/20701.txt b/changelog/20701.txt new file mode 100644 index 000000000..24942d5d0 --- /dev/null +++ b/changelog/20701.txt @@ -0,0 +1,3 @@ +```release-notes:bug +secrets/pki: Fix race during runUnifiedTransfer when deciding to skip re-running a test within a short window. +```