270 lines
8.2 KiB
Go
270 lines
8.2 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package servercert
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/agent/cache"
|
|
cachetype "github.com/hashicorp/consul/agent/cache-types"
|
|
"github.com/hashicorp/consul/agent/connect"
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
"github.com/hashicorp/consul/lib/retry"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/go-memdb"
|
|
)
|
|
|
|
// Correlation ID for leaf cert watches.
|
|
const leafWatchID = "leaf"
|
|
|
|
// Cache is an interface to represent the necessary methods of the agent/cache.Cache.
|
|
// It is used to request and renew the server leaf certificate.
|
|
type Cache interface {
|
|
Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error
|
|
}
|
|
|
|
// TLSConfigurator is an interface to represent the necessary methods of the tlsutil.Configurator.
|
|
// It is used to apply the server leaf certificate and server name.
|
|
type TLSConfigurator interface {
|
|
UpdateAutoTLSCert(pub, priv string) error
|
|
UpdateAutoTLSPeeringServerName(name string)
|
|
}
|
|
|
|
// Store is an interface to represent the necessary methods of the state.Store.
|
|
// It is used to fetch the CA Config to getStore the trust domain in the TLSConfigurator.
|
|
type Store interface {
|
|
CAConfig(ws memdb.WatchSet) (uint64, *structs.CAConfiguration, error)
|
|
SystemMetadataGet(ws memdb.WatchSet, key string) (uint64, *structs.SystemMetadataEntry, error)
|
|
AbandonCh() <-chan struct{}
|
|
}
|
|
|
|
type Config struct {
|
|
// Datacenter is the datacenter name the server is configured with.
|
|
Datacenter string
|
|
|
|
// ACLsEnabled indicates whether the ACL system is enabled on this server.
|
|
ACLsEnabled bool
|
|
}
|
|
|
|
type Deps struct {
|
|
Config Config
|
|
Logger hclog.Logger
|
|
Cache Cache
|
|
GetStore func() Store
|
|
TLSConfigurator TLSConfigurator
|
|
waiter retry.Waiter
|
|
}
|
|
|
|
// CertManager is responsible for requesting and renewing the leaf cert for server agents.
|
|
// The server certificate is managed internally and used for peering control-plane traffic
|
|
// to the TLS-enabled external gRPC port.
|
|
type CertManager struct {
|
|
logger hclog.Logger
|
|
|
|
// config contains agent configuration necessary for the cert manager to operate.
|
|
config Config
|
|
|
|
// cache provides an API to issue internal RPC requests and receive notifications
|
|
// when there are changes.
|
|
cache Cache
|
|
|
|
// cacheUpdateCh receives notifications of cache update events for resources watched.
|
|
cacheUpdateCh chan cache.UpdateEvent
|
|
|
|
// getStore returns the server state getStore for read-only access.
|
|
getStore func() Store
|
|
|
|
// tlsConfigurator receives the leaf cert and peering server name updates from the cert manager.
|
|
tlsConfigurator TLSConfigurator
|
|
|
|
// waiter contains the waiter for exponential backoff between retries.
|
|
waiter retry.Waiter
|
|
}
|
|
|
|
func NewCertManager(deps Deps) *CertManager {
|
|
return &CertManager{
|
|
config: deps.Config,
|
|
logger: deps.Logger,
|
|
cache: deps.Cache,
|
|
cacheUpdateCh: make(chan cache.UpdateEvent, 1),
|
|
getStore: deps.GetStore,
|
|
tlsConfigurator: deps.TLSConfigurator,
|
|
waiter: retry.Waiter{
|
|
MinFailures: 1,
|
|
MinWait: 1 * time.Second,
|
|
MaxWait: 5 * time.Minute,
|
|
Jitter: retry.NewJitter(20),
|
|
},
|
|
}
|
|
}
|
|
|
|
func (m *CertManager) Start(ctx context.Context) error {
|
|
if err := m.initializeWatches(ctx); err != nil {
|
|
return fmt.Errorf("failed to set up certificate watches: %w", err)
|
|
}
|
|
go m.handleUpdates(ctx)
|
|
|
|
m.logger.Info("initialized server certificate management")
|
|
return nil
|
|
}
|
|
|
|
func (m *CertManager) initializeWatches(ctx context.Context) error {
|
|
if m.config.ACLsEnabled {
|
|
// If ACLs are enabled we need to watch for server token updates and set/reset
|
|
// leaf cert updates as token updates arrive.
|
|
go m.watchServerToken(ctx)
|
|
} else {
|
|
// If ACLs are disabled we set up a single cache notification for leaf certs.
|
|
if err := m.watchLeafCert(ctx); err != nil {
|
|
return fmt.Errorf("failed to watch leaf: %w", err)
|
|
}
|
|
}
|
|
go m.watchCAConfig(ctx)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (m *CertManager) watchServerToken(ctx context.Context) {
|
|
// We keep the last iteration's cancel function to reset watches.
|
|
var (
|
|
notifyCtx context.Context
|
|
cancel context.CancelFunc = func() {}
|
|
)
|
|
retryLoopBackoff(ctx, m.waiter, func() error {
|
|
ws := memdb.NewWatchSet()
|
|
ws.Add(m.getStore().AbandonCh())
|
|
|
|
_, token, err := m.getStore().SystemMetadataGet(ws, structs.ServerManagementTokenAccessorID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if token == nil {
|
|
m.logger.Debug("ACLs have not finished initializing")
|
|
return nil
|
|
}
|
|
if token.Value == "" {
|
|
// This should never happen. If the leader stored a token with this key it will not be empty.
|
|
return fmt.Errorf("empty token")
|
|
}
|
|
m.logger.Debug("server management token watch fired - resetting leaf cert watch")
|
|
|
|
// Cancel existing the leaf cert watch and spin up new one any time the server token changes.
|
|
// The watch needs the current token as set by the leader since certificate signing requests go to the leader.
|
|
cancel()
|
|
notifyCtx, cancel = context.WithCancel(ctx)
|
|
|
|
req := cachetype.ConnectCALeafRequest{
|
|
Datacenter: m.config.Datacenter,
|
|
Token: token.Value,
|
|
Server: true,
|
|
}
|
|
if err := m.cache.Notify(notifyCtx, cachetype.ConnectCALeafName, &req, leafWatchID, m.cacheUpdateCh); err != nil {
|
|
return fmt.Errorf("failed to setup leaf cert notifications: %w", err)
|
|
}
|
|
|
|
ws.WatchCtx(ctx)
|
|
return nil
|
|
|
|
}, func(err error) {
|
|
m.logger.Error("failed to watch server management token", "error", err)
|
|
})
|
|
}
|
|
|
|
func (m *CertManager) watchLeafCert(ctx context.Context) error {
|
|
req := cachetype.ConnectCALeafRequest{
|
|
Datacenter: m.config.Datacenter,
|
|
Server: true,
|
|
}
|
|
if err := m.cache.Notify(ctx, cachetype.ConnectCALeafName, &req, leafWatchID, m.cacheUpdateCh); err != nil {
|
|
return fmt.Errorf("failed to setup leaf cert notifications: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (m *CertManager) watchCAConfig(ctx context.Context) {
|
|
retryLoopBackoff(ctx, m.waiter, func() error {
|
|
ws := memdb.NewWatchSet()
|
|
ws.Add(m.getStore().AbandonCh())
|
|
|
|
_, conf, err := m.getStore().CAConfig(ws)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch CA configuration from the state getStore: %w", err)
|
|
}
|
|
if conf == nil || conf.ClusterID == "" {
|
|
m.logger.Debug("CA has not finished initializing")
|
|
return nil
|
|
}
|
|
|
|
id := connect.SpiffeIDSigningForCluster(conf.ClusterID)
|
|
name := connect.PeeringServerSAN(m.config.Datacenter, id.Host())
|
|
|
|
m.logger.Debug("CA config watch fired - updating auto TLS server name", "name", name)
|
|
m.tlsConfigurator.UpdateAutoTLSPeeringServerName(name)
|
|
|
|
ws.WatchCtx(ctx)
|
|
return nil
|
|
|
|
}, func(err error) {
|
|
m.logger.Error("failed to watch CA config", "error", err)
|
|
})
|
|
}
|
|
|
|
func retryLoopBackoff(ctx context.Context, waiter retry.Waiter, loopFn func() error, errorFn func(error)) {
|
|
for {
|
|
if err := waiter.Wait(ctx); err != nil {
|
|
// The error will only be non-nil if the context is canceled.
|
|
return
|
|
}
|
|
|
|
if err := loopFn(); err != nil {
|
|
errorFn(err)
|
|
continue
|
|
}
|
|
|
|
// Reset the failure count seen by the waiter if there was no error.
|
|
waiter.Reset()
|
|
}
|
|
}
|
|
|
|
func (m *CertManager) handleUpdates(ctx context.Context) {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
m.logger.Debug("context canceled")
|
|
return
|
|
|
|
case event := <-m.cacheUpdateCh:
|
|
m.logger.Debug("got cache update event", "correlationID", event.CorrelationID, "error", event.Err)
|
|
|
|
if err := m.handleLeafUpdate(event); err != nil {
|
|
m.logger.Error("failed to handle cache update event", "error", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (m *CertManager) handleLeafUpdate(event cache.UpdateEvent) error {
|
|
if event.Err != nil {
|
|
return fmt.Errorf("leaf cert watch returned an error: %w", event.Err)
|
|
}
|
|
if event.CorrelationID != leafWatchID {
|
|
return fmt.Errorf("got unexpected update correlation ID %q while expecting %q", event.CorrelationID, leafWatchID)
|
|
}
|
|
|
|
leaf, ok := event.Result.(*structs.IssuedCert)
|
|
if !ok {
|
|
return fmt.Errorf("got invalid type in leaf cert watch response: %T", event.Result)
|
|
}
|
|
|
|
m.logger.Debug("leaf certificate watch fired - updating auto TLS certificate", "uri", leaf.ServerURI)
|
|
|
|
if err := m.tlsConfigurator.UpdateAutoTLSCert(leaf.CertPEM, leaf.PrivateKeyPEM); err != nil {
|
|
return fmt.Errorf("failed to getStore the server leaf cert: %w", err)
|
|
}
|
|
return nil
|
|
}
|