Avoid deadlocking on stateLock in emitMetrics (#15693)

When stopCh is closed we should stop trying to get the lock.
This commit is contained in:
Nick Cabatoff 2022-05-31 12:15:39 -04:00 committed by GitHub
parent 4aac96238c
commit 69c5e8c946
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 7 deletions

3
changelog/15693.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
core: Prevent metrics generation from causing deadlocks.
```

View File

@ -18,9 +18,20 @@ import (
func (c *Core) metricsLoop(stopCh chan struct{}) {
emitTimer := time.Tick(time.Second)
stopOrHAState := func() (bool, consts.HAState) {
stopped := grabLockOrStop(c.stateLock.RLock, c.stateLock.RUnlock, stopCh)
if stopped {
return true, 0
}
defer c.stateLock.RUnlock()
return false, c.HAState()
}
identityCountTimer := time.Tick(time.Minute * 10)
// Only emit on active node of cluster that is not a DR secondary.
if standby, _ := c.Standby(); standby || c.IsDRSecondary() {
if stopped, haState := stopOrHAState(); stopped {
return
} else if haState == consts.Standby || c.IsDRSecondary() {
identityCountTimer = nil
}
@ -38,7 +49,11 @@ func (c *Core) metricsLoop(stopCh chan struct{}) {
for {
select {
case <-emitTimer:
if !c.PerfStandby() {
stopped, haState := stopOrHAState()
if stopped {
return
}
if haState == consts.Active {
c.metricsMutex.Lock()
// Emit on active node only
if c.expiration != nil {
@ -55,13 +70,13 @@ func (c *Core) metricsLoop(stopCh chan struct{}) {
}
// Refresh the standby gauge, on all nodes
if standby, _ := c.Standby(); standby {
if haState != consts.Active {
c.metricSink.SetGaugeWithLabels([]string{"core", "active"}, 0, nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "active"}, 1, nil)
}
if perfStandby := c.PerfStandby(); perfStandby {
if haState == consts.PerfStandby {
c.metricSink.SetGaugeWithLabels([]string{"core", "performance_standby"}, 1, nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "performance_standby"}, 0, nil)
@ -103,9 +118,7 @@ func (c *Core) metricsLoop(stopCh chan struct{}) {
}
case <-writeTimer:
if stopped := grabLockOrStop(c.stateLock.RLock, c.stateLock.RUnlock, stopCh); stopped {
// Go through the loop again, this time the stop channel case
// should trigger
continue
return
}
// Ship barrier encryption counts if a perf standby or the active node
// on a performance secondary cluster