telemetry: fix a couple bugs in cert expiry metrics

1. do not emit the metric if Query fails
2. properly check for PrimaryUsersIntermediate, the logic was inverted

Also improve the logging by including the metric name in the log message
This commit is contained in:
Daniel Nephin 2021-08-04 13:26:36 -04:00
parent 1673b3a68c
commit 13aa7b70d5
1 changed files with 6 additions and 4 deletions

View File

@ -5,6 +5,7 @@ import (
"crypto/x509" "crypto/x509"
"errors" "errors"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
@ -73,12 +74,10 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
Query: func() (time.Duration, error) { Query: func() (time.Duration, error) {
provider, _ := s.caManager.getCAProvider() provider, _ := s.caManager.getCAProvider()
if _, ok := provider.(ca.PrimaryUsesIntermediate); !ok { if _, ok := provider.(ca.PrimaryUsesIntermediate); ok {
return getActiveIntermediateExpiry(s) return getActiveIntermediateExpiry(s)
} }
return getRootCAExpiry(s) return getRootCAExpiry(s)
}, },
} }
} }
@ -129,6 +128,8 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
ticker := time.NewTicker(certExpirationMonitorInterval) ticker := time.NewTicker(certExpirationMonitorInterval)
defer ticker.Stop() defer ticker.Stop()
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -136,7 +137,8 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
case <-ticker.C: case <-ticker.C:
d, err := m.Query() d, err := m.Query()
if err != nil { if err != nil {
m.Logger.Warn("failed to emit certificate expiry metric", "error", err) logger.Warn("failed to emit certificate expiry metric", "error", err)
continue
} }
expiry := d / time.Second expiry := d / time.Second
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels) metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)