2021-03-24 21:40:10 +00:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-08-04 17:05:10 +00:00
|
|
|
"crypto/x509"
|
2021-07-07 13:41:01 +00:00
|
|
|
"errors"
|
2021-03-24 21:40:10 +00:00
|
|
|
"fmt"
|
2021-08-04 17:26:36 +00:00
|
|
|
"strings"
|
2021-03-24 21:40:10 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/armon/go-metrics"
|
|
|
|
"github.com/armon/go-metrics/prometheus"
|
2021-07-07 13:41:01 +00:00
|
|
|
"github.com/hashicorp/go-hclog"
|
2021-08-04 17:05:10 +00:00
|
|
|
|
|
|
|
"github.com/hashicorp/consul/agent/connect"
|
|
|
|
"github.com/hashicorp/consul/agent/connect/ca"
|
|
|
|
"github.com/hashicorp/consul/logging"
|
|
|
|
"github.com/hashicorp/consul/tlsutil"
|
2021-03-24 21:40:10 +00:00
|
|
|
)
|
|
|
|
|
2021-07-07 13:41:01 +00:00
|
|
|
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
|
|
|
var metricsKeyMeshActiveSigningCAExpiry = []string{"mesh", "active-signing-ca", "expiry"}
|
|
|
|
|
2021-03-24 21:40:10 +00:00
|
|
|
var CertExpirationGauges = []prometheus.GaugeDefinition{
|
|
|
|
{
|
|
|
|
Name: metricsKeyMeshRootCAExpiry,
|
2021-07-07 13:41:01 +00:00
|
|
|
Help: "Seconds until the service mesh root certificate expires. Updated every hour",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: metricsKeyMeshActiveSigningCAExpiry,
|
|
|
|
Help: "Seconds until the service mesh signing certificate expires. Updated every hour",
|
2021-03-24 21:40:10 +00:00
|
|
|
},
|
2021-08-04 17:05:10 +00:00
|
|
|
{
|
|
|
|
Name: metricsKeyAgentTLSCertExpiry,
|
|
|
|
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
|
|
|
},
|
2021-03-24 21:40:10 +00:00
|
|
|
}
|
|
|
|
|
2021-08-04 17:05:10 +00:00
|
|
|
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
|
|
|
return CertExpirationMonitor{
|
2021-03-24 21:40:10 +00:00
|
|
|
Key: metricsKeyMeshRootCAExpiry,
|
|
|
|
Labels: []metrics.Label{
|
|
|
|
{Name: "datacenter", Value: s.config.Datacenter},
|
|
|
|
},
|
|
|
|
Logger: s.logger.Named(logging.Connect),
|
|
|
|
Query: func() (time.Duration, error) {
|
2021-07-07 13:41:01 +00:00
|
|
|
return getRootCAExpiry(s)
|
2021-03-24 21:40:10 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-07 13:41:01 +00:00
|
|
|
func getRootCAExpiry(s *Server) (time.Duration, error) {
|
|
|
|
state := s.fsm.State()
|
|
|
|
_, root, err := state.CARootActive(nil)
|
|
|
|
switch {
|
|
|
|
case err != nil:
|
|
|
|
return 0, fmt.Errorf("failed to retrieve root CA: %w", err)
|
|
|
|
case root == nil:
|
|
|
|
return 0, fmt.Errorf("no active root CA")
|
|
|
|
}
|
|
|
|
|
|
|
|
return time.Until(root.NotAfter), nil
|
|
|
|
}
|
|
|
|
|
2021-08-04 17:05:10 +00:00
|
|
|
func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
2021-07-07 13:41:01 +00:00
|
|
|
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
|
|
|
if isPrimary {
|
2021-08-04 17:05:10 +00:00
|
|
|
return CertExpirationMonitor{
|
2021-07-07 13:41:01 +00:00
|
|
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
|
|
|
Labels: []metrics.Label{
|
|
|
|
{Name: "datacenter", Value: s.config.Datacenter},
|
|
|
|
},
|
|
|
|
Logger: s.logger.Named(logging.Connect),
|
|
|
|
Query: func() (time.Duration, error) {
|
|
|
|
provider, _ := s.caManager.getCAProvider()
|
|
|
|
|
2021-08-04 17:26:36 +00:00
|
|
|
if _, ok := provider.(ca.PrimaryUsesIntermediate); ok {
|
2021-07-07 13:41:01 +00:00
|
|
|
return getActiveIntermediateExpiry(s)
|
|
|
|
}
|
|
|
|
return getRootCAExpiry(s)
|
|
|
|
},
|
|
|
|
}
|
2021-08-04 17:05:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return CertExpirationMonitor{
|
|
|
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
|
|
|
Labels: []metrics.Label{
|
|
|
|
{Name: "datacenter", Value: s.config.Datacenter},
|
|
|
|
},
|
|
|
|
Logger: s.logger.Named(logging.Connect),
|
|
|
|
Query: func() (time.Duration, error) {
|
|
|
|
return getActiveIntermediateExpiry(s)
|
|
|
|
},
|
2021-07-07 13:41:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
|
|
|
state := s.fsm.State()
|
|
|
|
_, root, err := state.CARootActive(nil)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// the CA used in a secondary DC is the active intermediate,
|
|
|
|
// which is the last in the IntermediateCerts stack
|
|
|
|
if len(root.IntermediateCerts) == 0 {
|
|
|
|
return 0, errors.New("no intermediate available")
|
|
|
|
}
|
|
|
|
cert, err := connect.ParseCert(root.IntermediateCerts[len(root.IntermediateCerts)-1])
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return time.Until(cert.NotAfter), nil
|
|
|
|
}
|
|
|
|
|
2021-08-04 17:05:10 +00:00
|
|
|
type CertExpirationMonitor struct {
|
2021-03-24 21:40:10 +00:00
|
|
|
Key []string
|
|
|
|
Labels []metrics.Label
|
|
|
|
Logger hclog.Logger
|
|
|
|
// Query is called at each interval. It should return the duration until the
|
|
|
|
// certificate expires, or an error if the query failed.
|
|
|
|
Query func() (time.Duration, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
const certExpirationMonitorInterval = time.Hour
|
|
|
|
|
2021-08-04 17:05:10 +00:00
|
|
|
func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
2021-03-24 21:40:10 +00:00
|
|
|
ticker := time.NewTicker(certExpirationMonitorInterval)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
2021-08-04 17:26:36 +00:00
|
|
|
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
|
|
|
|
2021-03-24 21:40:10 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
|
|
|
case <-ticker.C:
|
|
|
|
d, err := m.Query()
|
|
|
|
if err != nil {
|
2021-08-04 17:26:36 +00:00
|
|
|
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
|
|
|
continue
|
2021-03-24 21:40:10 +00:00
|
|
|
}
|
2021-08-04 18:18:59 +00:00
|
|
|
|
|
|
|
if d < 24*time.Hour {
|
|
|
|
logger.Warn("certificate will expire soon",
|
|
|
|
"time_to_expiry", d, "expiration", time.Now().Add(d))
|
|
|
|
}
|
|
|
|
|
2021-03-24 21:40:10 +00:00
|
|
|
expiry := d / time.Second
|
|
|
|
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-08-04 17:05:10 +00:00
|
|
|
|
|
|
|
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
|
|
|
|
|
|
|
// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will
|
|
|
|
// monitor the expiration of the certificate used for agent TLS.
|
|
|
|
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor {
|
|
|
|
return CertExpirationMonitor{
|
|
|
|
Key: metricsKeyAgentTLSCertExpiry,
|
|
|
|
Labels: []metrics.Label{
|
|
|
|
{Name: "node", Value: c.Base().NodeName},
|
|
|
|
{Name: "datacenter", Value: dc},
|
|
|
|
},
|
|
|
|
Logger: logger,
|
|
|
|
Query: func() (time.Duration, error) {
|
|
|
|
raw := c.Cert()
|
|
|
|
if raw == nil {
|
|
|
|
return 0, fmt.Errorf("tls not enabled")
|
|
|
|
}
|
|
|
|
|
|
|
|
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
|
|
|
if err != nil {
|
|
|
|
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
|
|
|
}
|
|
|
|
return time.Until(cert.NotAfter), nil
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|