Add ca certificate metrics (#10504)
* add intermediate ca metric routine * add Gauge config for intermediate cert * Stop metrics routine when stopping leader * add changelog entry * updage changelog Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use variables instead of a map * go imports sort * Add metrics for primary and secondary ca * start metrics routine in the right DC * add telemetry documentation * update docs * extract expiry fetching in a func * merge metrics for primary and secondary into signing ca metric Co-authored-by: Daniel Nephin <dnephin@hashicorp.com>
This commit is contained in:
parent
83c543cd6b
commit
e5dbf5e55b
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:enhancement
|
||||||
|
telemetry: added metrics to track certificates expiry.
|
||||||
|
```
|
|
@ -37,6 +37,7 @@ func (s *Server) startConnectLeader(ctx context.Context) error {
|
||||||
s.caManager.Start(ctx)
|
s.caManager.Start(ctx)
|
||||||
s.leaderRoutineManager.Start(ctx, caRootPruningRoutineName, s.runCARootPruning)
|
s.leaderRoutineManager.Start(ctx, caRootPruningRoutineName, s.runCARootPruning)
|
||||||
s.leaderRoutineManager.Start(ctx, caRootMetricRoutineName, rootCAExpiryMonitor(s).monitor)
|
s.leaderRoutineManager.Start(ctx, caRootMetricRoutineName, rootCAExpiryMonitor(s).monitor)
|
||||||
|
s.leaderRoutineManager.Start(ctx, caSigningMetricRoutineName, signingCAExpiryMonitor(s).monitor)
|
||||||
|
|
||||||
return s.startIntentionConfigEntryMigration(ctx)
|
return s.startIntentionConfigEntryMigration(ctx)
|
||||||
}
|
}
|
||||||
|
@ -46,6 +47,8 @@ func (s *Server) stopConnectLeader() {
|
||||||
s.caManager.Stop()
|
s.caManager.Stop()
|
||||||
s.leaderRoutineManager.Stop(intentionMigrationRoutineName)
|
s.leaderRoutineManager.Stop(intentionMigrationRoutineName)
|
||||||
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
|
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
|
||||||
|
s.leaderRoutineManager.Stop(caRootMetricRoutineName)
|
||||||
|
s.leaderRoutineManager.Stop(caSigningMetricRoutineName)
|
||||||
|
|
||||||
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
|
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
|
||||||
provider, _ := s.caManager.getCAProvider()
|
provider, _ := s.caManager.getCAProvider()
|
||||||
|
|
|
@ -2,25 +2,34 @@ package consul
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/connect/ca"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/connect"
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
"github.com/armon/go-metrics/prometheus"
|
"github.com/armon/go-metrics/prometheus"
|
||||||
"github.com/hashicorp/go-hclog"
|
|
||||||
|
|
||||||
"github.com/hashicorp/consul/logging"
|
"github.com/hashicorp/consul/logging"
|
||||||
|
"github.com/hashicorp/go-hclog"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
||||||
|
var metricsKeyMeshActiveSigningCAExpiry = []string{"mesh", "active-signing-ca", "expiry"}
|
||||||
|
|
||||||
var CertExpirationGauges = []prometheus.GaugeDefinition{
|
var CertExpirationGauges = []prometheus.GaugeDefinition{
|
||||||
{
|
{
|
||||||
Name: metricsKeyMeshRootCAExpiry,
|
Name: metricsKeyMeshRootCAExpiry,
|
||||||
Help: "Seconds until the service mesh root certificate expires.",
|
Help: "Seconds until the service mesh root certificate expires. Updated every hour",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
|
Help: "Seconds until the service mesh signing certificate expires. Updated every hour",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
|
||||||
|
|
||||||
func rootCAExpiryMonitor(s *Server) certExpirationMonitor {
|
func rootCAExpiryMonitor(s *Server) certExpirationMonitor {
|
||||||
return certExpirationMonitor{
|
return certExpirationMonitor{
|
||||||
Key: metricsKeyMeshRootCAExpiry,
|
Key: metricsKeyMeshRootCAExpiry,
|
||||||
|
@ -29,20 +38,77 @@ func rootCAExpiryMonitor(s *Server) certExpirationMonitor {
|
||||||
},
|
},
|
||||||
Logger: s.logger.Named(logging.Connect),
|
Logger: s.logger.Named(logging.Connect),
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
state := s.fsm.State()
|
return getRootCAExpiry(s)
|
||||||
_, root, err := state.CARootActive(nil)
|
|
||||||
switch {
|
|
||||||
case err != nil:
|
|
||||||
return 0, fmt.Errorf("failed to retrieve root CA: %w", err)
|
|
||||||
case root == nil:
|
|
||||||
return 0, fmt.Errorf("no active root CA")
|
|
||||||
}
|
|
||||||
|
|
||||||
return time.Until(root.NotAfter), nil
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getRootCAExpiry(s *Server) (time.Duration, error) {
|
||||||
|
state := s.fsm.State()
|
||||||
|
_, root, err := state.CARootActive(nil)
|
||||||
|
switch {
|
||||||
|
case err != nil:
|
||||||
|
return 0, fmt.Errorf("failed to retrieve root CA: %w", err)
|
||||||
|
case root == nil:
|
||||||
|
return 0, fmt.Errorf("no active root CA")
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Until(root.NotAfter), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func signingCAExpiryMonitor(s *Server) certExpirationMonitor {
|
||||||
|
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
||||||
|
if isPrimary {
|
||||||
|
return certExpirationMonitor{
|
||||||
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
|
Labels: []metrics.Label{
|
||||||
|
{Name: "datacenter", Value: s.config.Datacenter},
|
||||||
|
},
|
||||||
|
Logger: s.logger.Named(logging.Connect),
|
||||||
|
Query: func() (time.Duration, error) {
|
||||||
|
provider, _ := s.caManager.getCAProvider()
|
||||||
|
|
||||||
|
if _, ok := provider.(ca.PrimaryUsesIntermediate); !ok {
|
||||||
|
return getActiveIntermediateExpiry(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
return getRootCAExpiry(s)
|
||||||
|
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return certExpirationMonitor{
|
||||||
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
|
Labels: []metrics.Label{
|
||||||
|
{Name: "datacenter", Value: s.config.Datacenter},
|
||||||
|
},
|
||||||
|
Logger: s.logger.Named(logging.Connect),
|
||||||
|
Query: func() (time.Duration, error) {
|
||||||
|
return getActiveIntermediateExpiry(s)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
||||||
|
state := s.fsm.State()
|
||||||
|
_, root, err := state.CARootActive(nil)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// the CA used in a secondary DC is the active intermediate,
|
||||||
|
// which is the last in the IntermediateCerts stack
|
||||||
|
if len(root.IntermediateCerts) == 0 {
|
||||||
|
return 0, errors.New("no intermediate available")
|
||||||
|
}
|
||||||
|
cert, err := connect.ParseCert(root.IntermediateCerts[len(root.IntermediateCerts)-1])
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return time.Until(cert.NotAfter), nil
|
||||||
|
}
|
||||||
|
|
||||||
type certExpirationMonitor struct {
|
type certExpirationMonitor struct {
|
||||||
Key []string
|
Key []string
|
||||||
Labels []metrics.Label
|
Labels []metrics.Label
|
||||||
|
|
|
@ -103,6 +103,7 @@ const (
|
||||||
aclUpgradeRoutineName = "legacy ACL token upgrade"
|
aclUpgradeRoutineName = "legacy ACL token upgrade"
|
||||||
caRootPruningRoutineName = "CA root pruning"
|
caRootPruningRoutineName = "CA root pruning"
|
||||||
caRootMetricRoutineName = "CA root expiration metric"
|
caRootMetricRoutineName = "CA root expiration metric"
|
||||||
|
caSigningMetricRoutineName = "CA signing expiration metric"
|
||||||
configReplicationRoutineName = "config entry replication"
|
configReplicationRoutineName = "config entry replication"
|
||||||
federationStateReplicationRoutineName = "federation state replication"
|
federationStateReplicationRoutineName = "federation state replication"
|
||||||
federationStateAntiEntropyRoutineName = "federation state anti-entropy"
|
federationStateAntiEntropyRoutineName = "federation state anti-entropy"
|
||||||
|
|
|
@ -479,6 +479,7 @@ These metrics give insight into the health of the cluster as a whole.
|
||||||
| `consul.catalog.connect.query-tags..` | Increments for each connect-based catalog query for the given service with the given tags. | queries | counter |
|
| `consul.catalog.connect.query-tags..` | Increments for each connect-based catalog query for the given service with the given tags. | queries | counter |
|
||||||
| `consul.catalog.connect.not-found.` | Increments for each connect-based catalog query where the given service could not be found. | queries | counter |
|
| `consul.catalog.connect.not-found.` | Increments for each connect-based catalog query where the given service could not be found. | queries | counter |
|
||||||
| `consul.mesh.active-root-ca.expiry` | The number of seconds until the root CA expires, updated every hour. | seconds | gauge |
|
| `consul.mesh.active-root-ca.expiry` | The number of seconds until the root CA expires, updated every hour. | seconds | gauge |
|
||||||
|
| `consul.mesh.active-signing-ca.expiry` | The number of seconds until the signing CA expires, updated every hour. | seconds | gauge |
|
||||||
|
|
||||||
## Connect Built-in Proxy Metrics
|
## Connect Built-in Proxy Metrics
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue