open-consul/agent/consul/servercert/manager.go
freddygv 2c5caec97c Share mgw addrs in peering stream if needed
This commit adds handling so that the replication stream considers
whether the user intends to peer through mesh gateways.

The subscription will return server or mesh gateway addresses depending
on the mesh configuration setting. These watches can be updated at
runtime by modifying the mesh config entry.
2022-10-03 11:42:20 -06:00

267 lines
8.1 KiB
Go

package servercert
import (
"context"
"fmt"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib/retry"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
)
// Correlation ID for leaf cert watches.
const leafWatchID = "leaf"
// Cache is an interface to represent the necessary methods of the agent/cache.Cache.
// It is used to request and renew the server leaf certificate.
type Cache interface {
Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error
}
// TLSConfigurator is an interface to represent the necessary methods of the tlsutil.Configurator.
// It is used to apply the server leaf certificate and server name.
type TLSConfigurator interface {
UpdateAutoTLSCert(pub, priv string) error
UpdateAutoTLSPeeringServerName(name string)
}
// Store is an interface to represent the necessary methods of the state.Store.
// It is used to fetch the CA Config to getStore the trust domain in the TLSConfigurator.
type Store interface {
CAConfig(ws memdb.WatchSet) (uint64, *structs.CAConfiguration, error)
SystemMetadataGet(ws memdb.WatchSet, key string) (uint64, *structs.SystemMetadataEntry, error)
AbandonCh() <-chan struct{}
}
type Config struct {
// Datacenter is the datacenter name the server is configured with.
Datacenter string
// ACLsEnabled indicates whether the ACL system is enabled on this server.
ACLsEnabled bool
}
type Deps struct {
Config Config
Logger hclog.Logger
Cache Cache
GetStore func() Store
TLSConfigurator TLSConfigurator
waiter retry.Waiter
}
// CertManager is responsible for requesting and renewing the leaf cert for server agents.
// The server certificate is managed internally and used for peering control-plane traffic
// to the TLS-enabled external gRPC port.
type CertManager struct {
logger hclog.Logger
// config contains agent configuration necessary for the cert manager to operate.
config Config
// cache provides an API to issue internal RPC requests and receive notifications
// when there are changes.
cache Cache
// cacheUpdateCh receives notifications of cache update events for resources watched.
cacheUpdateCh chan cache.UpdateEvent
// getStore returns the server state getStore for read-only access.
getStore func() Store
// tlsConfigurator receives the leaf cert and peering server name updates from the cert manager.
tlsConfigurator TLSConfigurator
// waiter contains the waiter for exponential backoff between retries.
waiter retry.Waiter
}
func NewCertManager(deps Deps) *CertManager {
return &CertManager{
config: deps.Config,
logger: deps.Logger,
cache: deps.Cache,
cacheUpdateCh: make(chan cache.UpdateEvent, 1),
getStore: deps.GetStore,
tlsConfigurator: deps.TLSConfigurator,
waiter: retry.Waiter{
MinFailures: 1,
MinWait: 1 * time.Second,
MaxWait: 5 * time.Minute,
Jitter: retry.NewJitter(20),
},
}
}
func (m *CertManager) Start(ctx context.Context) error {
if err := m.initializeWatches(ctx); err != nil {
return fmt.Errorf("failed to set up certificate watches: %w", err)
}
go m.handleUpdates(ctx)
m.logger.Info("initialized server certificate management")
return nil
}
func (m *CertManager) initializeWatches(ctx context.Context) error {
if m.config.ACLsEnabled {
// If ACLs are enabled we need to watch for server token updates and set/reset
// leaf cert updates as token updates arrive.
go m.watchServerToken(ctx)
} else {
// If ACLs are disabled we set up a single cache notification for leaf certs.
if err := m.watchLeafCert(ctx); err != nil {
return fmt.Errorf("failed to watch leaf: %w", err)
}
}
go m.watchCAConfig(ctx)
return nil
}
func (m *CertManager) watchServerToken(ctx context.Context) {
// We keep the last iteration's cancel function to reset watches.
var (
notifyCtx context.Context
cancel context.CancelFunc = func() {}
)
retryLoopBackoff(ctx, m.waiter, func() error {
ws := memdb.NewWatchSet()
ws.Add(m.getStore().AbandonCh())
_, token, err := m.getStore().SystemMetadataGet(ws, structs.ServerManagementTokenAccessorID)
if err != nil {
return err
}
if token == nil {
m.logger.Debug("ACLs have not finished initializing")
return nil
}
if token.Value == "" {
// This should never happen. If the leader stored a token with this key it will not be empty.
return fmt.Errorf("empty token")
}
m.logger.Debug("server management token watch fired - resetting leaf cert watch")
// Cancel existing the leaf cert watch and spin up new one any time the server token changes.
// The watch needs the current token as set by the leader since certificate signing requests go to the leader.
cancel()
notifyCtx, cancel = context.WithCancel(ctx)
req := cachetype.ConnectCALeafRequest{
Datacenter: m.config.Datacenter,
Token: token.Value,
Server: true,
}
if err := m.cache.Notify(notifyCtx, cachetype.ConnectCALeafName, &req, leafWatchID, m.cacheUpdateCh); err != nil {
return fmt.Errorf("failed to setup leaf cert notifications: %w", err)
}
ws.WatchCtx(ctx)
return nil
}, func(err error) {
m.logger.Error("failed to watch server management token", "error", err)
})
}
func (m *CertManager) watchLeafCert(ctx context.Context) error {
req := cachetype.ConnectCALeafRequest{
Datacenter: m.config.Datacenter,
Server: true,
}
if err := m.cache.Notify(ctx, cachetype.ConnectCALeafName, &req, leafWatchID, m.cacheUpdateCh); err != nil {
return fmt.Errorf("failed to setup leaf cert notifications: %w", err)
}
return nil
}
func (m *CertManager) watchCAConfig(ctx context.Context) {
retryLoopBackoff(ctx, m.waiter, func() error {
ws := memdb.NewWatchSet()
ws.Add(m.getStore().AbandonCh())
_, conf, err := m.getStore().CAConfig(ws)
if err != nil {
return fmt.Errorf("failed to fetch CA configuration from the state getStore: %w", err)
}
if conf == nil || conf.ClusterID == "" {
m.logger.Debug("CA has not finished initializing")
return nil
}
id := connect.SpiffeIDSigningForCluster(conf.ClusterID)
name := connect.PeeringServerSAN(m.config.Datacenter, id.Host())
m.logger.Debug("CA config watch fired - updating auto TLS server name", "name", name)
m.tlsConfigurator.UpdateAutoTLSPeeringServerName(name)
ws.WatchCtx(ctx)
return nil
}, func(err error) {
m.logger.Error("failed to watch CA config", "error", err)
})
}
func retryLoopBackoff(ctx context.Context, waiter retry.Waiter, loopFn func() error, errorFn func(error)) {
for {
if err := waiter.Wait(ctx); err != nil {
// The error will only be non-nil if the context is canceled.
return
}
if err := loopFn(); err != nil {
errorFn(err)
continue
}
// Reset the failure count seen by the waiter if there was no error.
waiter.Reset()
}
}
func (m *CertManager) handleUpdates(ctx context.Context) {
for {
select {
case <-ctx.Done():
m.logger.Debug("context canceled")
return
case event := <-m.cacheUpdateCh:
m.logger.Debug("got cache update event", "correlationID", event.CorrelationID, "error", event.Err)
if err := m.handleLeafUpdate(event); err != nil {
m.logger.Error("failed to handle cache update event", "error", err)
}
}
}
}
func (m *CertManager) handleLeafUpdate(event cache.UpdateEvent) error {
if event.Err != nil {
return fmt.Errorf("leaf cert watch returned an error: %w", event.Err)
}
if event.CorrelationID != leafWatchID {
return fmt.Errorf("got unexpected update correlation ID %q while expecting %q", event.CorrelationID, leafWatchID)
}
leaf, ok := event.Result.(*structs.IssuedCert)
if !ok {
return fmt.Errorf("got invalid type in leaf cert watch response: %T", event.Result)
}
m.logger.Debug("leaf certificate watch fired - updating auto TLS certificate", "uri", leaf.ServerURI)
if err := m.tlsConfigurator.UpdateAutoTLSCert(leaf.CertPEM, leaf.PrivateKeyPEM); err != nil {
return fmt.Errorf("failed to getStore the server leaf cert: %w", err)
}
return nil
}