telemetry: improve cert expiry metrics
Emit the metric immediately so that after restarting an agent, the new expiry time will be emitted. This is particularly important when this metric is being monitored, because we want the alert to resovle itself immediately. Also fixed a bug that was exposed in one of these metrics. The CARoot can be nil, so we have to handle that case.
This commit is contained in:
parent
ae76144f55
commit
616cc9b6f8
|
@ -97,8 +97,11 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||||
func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
||||||
state := s.fsm.State()
|
state := s.fsm.State()
|
||||||
_, root, err := state.CARootActive(nil)
|
_, root, err := state.CARootActive(nil)
|
||||||
if err != nil {
|
switch {
|
||||||
return 0, err
|
case err != nil:
|
||||||
|
return 0, fmt.Errorf("failed to retrieve root CA: %w", err)
|
||||||
|
case root == nil:
|
||||||
|
return 0, fmt.Errorf("no active root CA")
|
||||||
}
|
}
|
||||||
|
|
||||||
// the CA used in a secondary DC is the active intermediate,
|
// the CA used in a secondary DC is the active intermediate,
|
||||||
|
@ -130,24 +133,32 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
||||||
|
|
||||||
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
||||||
|
|
||||||
|
fn := func() {
|
||||||
|
d, err := m.Query()
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if d < 24*time.Hour {
|
||||||
|
logger.Warn("certificate will expire soon",
|
||||||
|
"time_to_expiry", d, "expiration", time.Now().Add(d))
|
||||||
|
}
|
||||||
|
|
||||||
|
expiry := d / time.Second
|
||||||
|
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// emit the metric immediately so that if a cert was just updated the
|
||||||
|
// new metric will be updated to the new expiration time.
|
||||||
|
fn()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return nil
|
return nil
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
d, err := m.Query()
|
fn()
|
||||||
if err != nil {
|
|
||||||
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if d < 24*time.Hour {
|
|
||||||
logger.Warn("certificate will expire soon",
|
|
||||||
"time_to_expiry", d, "expiration", time.Now().Add(d))
|
|
||||||
}
|
|
||||||
|
|
||||||
expiry := d / time.Second
|
|
||||||
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue