86c9cb037f
Replace two methods with a single one that returns the cert. This moves more of the logic into the single caller (auto-config). tlsutil.Configurator is widely used. By keeping it smaller and focused only on storing and returning TLS config, we make the code easier to follow. These two methods were more related to auto-config than to tlsutil, so reducing the interface moves the logic closer to the feature that requires it.
197 lines
7.2 KiB
Go
197 lines
7.2 KiB
Go
package autoconf
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/agent/cache"
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
)
|
|
|
|
// handleCacheEvent is used to handle event notifications from the cache for the roots
|
|
// or leaf cert watches.
|
|
func (ac *AutoConfig) handleCacheEvent(u cache.UpdateEvent) error {
|
|
switch u.CorrelationID {
|
|
case rootsWatchID:
|
|
ac.logger.Debug("roots watch fired - updating CA certificates")
|
|
if u.Err != nil {
|
|
return fmt.Errorf("root watch returned an error: %w", u.Err)
|
|
}
|
|
|
|
roots, ok := u.Result.(*structs.IndexedCARoots)
|
|
if !ok {
|
|
return fmt.Errorf("invalid type for roots watch response: %T", u.Result)
|
|
}
|
|
|
|
return ac.updateCARoots(roots)
|
|
case leafWatchID:
|
|
ac.logger.Debug("leaf certificate watch fired - updating TLS certificate")
|
|
if u.Err != nil {
|
|
return fmt.Errorf("leaf watch returned an error: %w", u.Err)
|
|
}
|
|
|
|
leaf, ok := u.Result.(*structs.IssuedCert)
|
|
if !ok {
|
|
return fmt.Errorf("invalid type for agent leaf cert watch response: %T", u.Result)
|
|
}
|
|
|
|
return ac.updateLeafCert(leaf)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// handleTokenUpdate is used when a notification about the agent token being updated
|
|
// is received and various watches need cancelling/restarting to use the new token.
|
|
func (ac *AutoConfig) handleTokenUpdate(ctx context.Context) error {
|
|
ac.logger.Debug("Agent token updated - resetting watches")
|
|
|
|
// TODO (autoencrypt) Prepopulate the cache with the new token with
|
|
// the existing cache entry with the old token. The certificate doesn't
|
|
// need to change just because the token has. However there isn't a
|
|
// good way to make that happen and this behavior is benign enough
|
|
// that I am going to push off implementing it.
|
|
|
|
// the agent token has been updated so we must update our leaf cert watch.
|
|
// this cancels the current watches before setting up new ones
|
|
ac.cancelWatches()
|
|
|
|
// recreate the chan for cache updates. This is a precautionary measure to ensure
|
|
// that we don't accidentally get notified for the new watches being setup before
|
|
// a blocking query in the cache returns and sends data to the old chan. In theory
|
|
// the code in agent/cache/watch.go should prevent this where we specifically check
|
|
// for context cancellation prior to sending the event. However we could cancel
|
|
// it after that check and finish setting up the new watches before getting the old
|
|
// events. Both the go routine scheduler and the OS thread scheduler would have to
|
|
// be acting up for this to happen. Regardless the way to ensure we don't get events
|
|
// for the old watches is to simply replace the chan we are expecting them from.
|
|
close(ac.cacheUpdates)
|
|
ac.cacheUpdates = make(chan cache.UpdateEvent, 10)
|
|
|
|
// restart watches - this will be done with the correct token
|
|
cancelWatches, err := ac.setupCertificateCacheWatches(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to restart watches after agent token update: %w", err)
|
|
}
|
|
ac.cancelWatches = cancelWatches
|
|
return nil
|
|
}
|
|
|
|
// handleFallback is used when the current TLS certificate has expired and the normal
|
|
// updating mechanisms have failed to renew it quickly enough. This function will
|
|
// use the configured fallback mechanism to retrieve a new cert and start monitoring
|
|
// that one.
|
|
func (ac *AutoConfig) handleFallback(ctx context.Context) error {
|
|
ac.logger.Warn("agent's client certificate has expired")
|
|
// Background because the context is mainly useful when the agent is first starting up.
|
|
switch {
|
|
case ac.config.AutoConfig.Enabled:
|
|
resp, err := ac.getInitialConfiguration(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("error while retrieving new agent certificates via auto-config: %w", err)
|
|
}
|
|
|
|
return ac.recordInitialConfiguration(resp)
|
|
case ac.config.AutoEncryptTLS:
|
|
reply, err := ac.autoEncryptInitialCerts(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("error while retrieving new agent certificate via auto-encrypt: %w", err)
|
|
}
|
|
return ac.setInitialTLSCertificates(reply)
|
|
default:
|
|
return fmt.Errorf("logic error: either auto-encrypt or auto-config must be enabled")
|
|
}
|
|
}
|
|
|
|
// run is the private method to be spawn by the Start method for
|
|
// executing the main monitoring loop.
|
|
func (ac *AutoConfig) run(ctx context.Context, exit chan struct{}) {
|
|
// The fallbackTimer is used to notify AFTER the agents
|
|
// leaf certificate has expired and where we need
|
|
// to fall back to the less secure RPC endpoint just like
|
|
// if the agent was starting up new.
|
|
//
|
|
// Check 10sec (fallback leeway duration) after cert
|
|
// expires. The agent cache should be handling the expiration
|
|
// and renew it before then.
|
|
//
|
|
// If there is no cert, use a value which immediately triggers the
|
|
// renew, but this case shouldn't happen because at
|
|
// this point, auto_encrypt was just being setup
|
|
// successfully.
|
|
calcFallbackInterval := func() time.Duration {
|
|
cert := ac.acConfig.TLSConfigurator.AutoEncryptCert()
|
|
if cert == nil {
|
|
return -1
|
|
}
|
|
expiry := cert.NotAfter.Add(ac.acConfig.FallbackLeeway)
|
|
return expiry.Sub(time.Now())
|
|
}
|
|
fallbackTimer := time.NewTimer(calcFallbackInterval())
|
|
|
|
// cleanup for once we are stopped
|
|
defer func() {
|
|
// cancel the go routines performing the cache watches
|
|
ac.cancelWatches()
|
|
// ensure we don't leak the timers go routine
|
|
fallbackTimer.Stop()
|
|
// stop receiving notifications for token updates
|
|
ac.acConfig.Tokens.StopNotify(ac.tokenUpdates)
|
|
|
|
ac.logger.Debug("auto-config has been stopped")
|
|
|
|
ac.Lock()
|
|
ac.cancel = nil
|
|
ac.running = false
|
|
// this should be the final cleanup task as its what notifies
|
|
// the rest of the world that this go routine has exited.
|
|
close(exit)
|
|
ac.Unlock()
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
ac.logger.Debug("stopping auto-config")
|
|
return
|
|
case <-ac.tokenUpdates.Ch:
|
|
ac.logger.Debug("handling a token update event")
|
|
|
|
if err := ac.handleTokenUpdate(ctx); err != nil {
|
|
ac.logger.Error("error in handling token update event", "error", err)
|
|
}
|
|
case u := <-ac.cacheUpdates:
|
|
ac.logger.Debug("handling a cache update event", "correlation_id", u.CorrelationID)
|
|
|
|
if err := ac.handleCacheEvent(u); err != nil {
|
|
ac.logger.Error("error in handling cache update event", "error", err)
|
|
}
|
|
|
|
// reset the fallback timer as the certificate may have been updated
|
|
fallbackTimer.Stop()
|
|
fallbackTimer = time.NewTimer(calcFallbackInterval())
|
|
case <-fallbackTimer.C:
|
|
// This is a safety net in case the cert doesn't get renewed
|
|
// in time. The agent would be stuck in that case because the watches
|
|
// never use the AutoEncrypt.Sign endpoint.
|
|
|
|
// check auto encrypt client cert expiration
|
|
cert := ac.acConfig.TLSConfigurator.AutoEncryptCert()
|
|
if cert == nil || cert.NotAfter.Before(time.Now()) {
|
|
if err := ac.handleFallback(ctx); err != nil {
|
|
ac.logger.Error("error when handling a certificate expiry event", "error", err)
|
|
fallbackTimer = time.NewTimer(ac.acConfig.FallbackRetry)
|
|
} else {
|
|
fallbackTimer = time.NewTimer(calcFallbackInterval())
|
|
}
|
|
} else {
|
|
// this shouldn't be possible. We calculate the timer duration to be the certificate
|
|
// expiration time + some leeway (10s default). So whenever we get here the certificate
|
|
// should be expired. Regardless its probably worth resetting the timer.
|
|
fallbackTimer = time.NewTimer(calcFallbackInterval())
|
|
}
|
|
}
|
|
}
|
|
}
|