From 69c5e8c9468d2c7b6f1b02c834302e57291bbc87 Mon Sep 17 00:00:00 2001 From: Nick Cabatoff Date: Tue, 31 May 2022 12:15:39 -0400 Subject: [PATCH] Avoid deadlocking on stateLock in emitMetrics (#15693) When stopCh is closed we should stop trying to get the lock. --- changelog/15693.txt | 3 +++ vault/core_metrics.go | 27 ++++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 changelog/15693.txt diff --git a/changelog/15693.txt b/changelog/15693.txt new file mode 100644 index 000000000..7426c15c3 --- /dev/null +++ b/changelog/15693.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Prevent metrics generation from causing deadlocks. +``` diff --git a/vault/core_metrics.go b/vault/core_metrics.go index acf021a52..18c943deb 100644 --- a/vault/core_metrics.go +++ b/vault/core_metrics.go @@ -18,9 +18,20 @@ import ( func (c *Core) metricsLoop(stopCh chan struct{}) { emitTimer := time.Tick(time.Second) + stopOrHAState := func() (bool, consts.HAState) { + stopped := grabLockOrStop(c.stateLock.RLock, c.stateLock.RUnlock, stopCh) + if stopped { + return true, 0 + } + defer c.stateLock.RUnlock() + return false, c.HAState() + } + identityCountTimer := time.Tick(time.Minute * 10) // Only emit on active node of cluster that is not a DR secondary. - if standby, _ := c.Standby(); standby || c.IsDRSecondary() { + if stopped, haState := stopOrHAState(); stopped { + return + } else if haState == consts.Standby || c.IsDRSecondary() { identityCountTimer = nil } @@ -38,7 +49,11 @@ func (c *Core) metricsLoop(stopCh chan struct{}) { for { select { case <-emitTimer: - if !c.PerfStandby() { + stopped, haState := stopOrHAState() + if stopped { + return + } + if haState == consts.Active { c.metricsMutex.Lock() // Emit on active node only if c.expiration != nil { @@ -55,13 +70,13 @@ func (c *Core) metricsLoop(stopCh chan struct{}) { } // Refresh the standby gauge, on all nodes - if standby, _ := c.Standby(); standby { + if haState != consts.Active { c.metricSink.SetGaugeWithLabels([]string{"core", "active"}, 0, nil) } else { c.metricSink.SetGaugeWithLabels([]string{"core", "active"}, 1, nil) } - if perfStandby := c.PerfStandby(); perfStandby { + if haState == consts.PerfStandby { c.metricSink.SetGaugeWithLabels([]string{"core", "performance_standby"}, 1, nil) } else { c.metricSink.SetGaugeWithLabels([]string{"core", "performance_standby"}, 0, nil) @@ -103,9 +118,7 @@ func (c *Core) metricsLoop(stopCh chan struct{}) { } case <-writeTimer: if stopped := grabLockOrStop(c.stateLock.RLock, c.stateLock.RUnlock, stopCh); stopped { - // Go through the loop again, this time the stop channel case - // should trigger - continue + return } // Ship barrier encryption counts if a perf standby or the active node // on a performance secondary cluster