telemetry: prevent stale values from cert monitors

Prometheus scrapes metrics from each process, so when leadership transfers to a different node
the previous leader would still be reporting the old cached value.

By setting NaN, I believe we should zero-out the value, so that prometheus should only consider the
value from the new leader.
This commit is contained in:
Daniel Nephin 2021-08-05 18:38:06 -04:00
parent 616cc9b6f8
commit 9de725c17d
1 changed files with 4 additions and 0 deletions

View File

@ -5,6 +5,7 @@ import (
"crypto/x509"
"errors"
"fmt"
"math"
"strings"
"time"
@ -156,6 +157,9 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
for {
select {
case <-ctx.Done():
// "Zero-out" the metric on exit so that when prometheus scrapes this
// metric from a non-leader, it does not get a stale value.
metrics.SetGauge(m.Key, float32(math.NaN()))
return nil
case <-ticker.C:
fn()