connect: Add CAManager for synchronizing CA operations

This commit is contained in:
Kyle Havlovitz 2020-11-11 17:05:04 -08:00
parent 0b4876f906
commit 5de81c1375
9 changed files with 875 additions and 795 deletions

View File

@ -135,7 +135,7 @@ func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.Lifeti
go watcher.Start() go watcher.Start()
case <-watcher.RenewCh(): case <-watcher.RenewCh():
v.logger.Error("Successfully renewed token for Vault provider") v.logger.Info("Successfully renewed token for Vault provider")
} }
} }
} }

View File

@ -165,18 +165,13 @@ func (s *ConnectCA) ConfigurationSet(
// If this is a secondary, check if the intermediate needs to be regenerated. // If this is a secondary, check if the intermediate needs to be regenerated.
if s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter { if s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter {
// Get the current root certs from the primary DC. // Attempt to take the CA lock in order to update the config.
var roots structs.IndexedCARoots if err := s.srv.caManager.setState(CAStateReconfig); err != nil {
rootArgs := structs.DCSpecificRequest{ return err
Datacenter: s.srv.config.PrimaryDatacenter,
}
if err := s.srv.forwardDC("ConnectCA.Roots", s.srv.config.PrimaryDatacenter, &rootArgs, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
} }
defer s.srv.caManager.setReady()
s.srv.caProviderReconfigurationLock.Lock() if err := s.srv.caManager.initializeSecondaryCA(newProvider, args.Config); err != nil {
defer s.srv.caProviderReconfigurationLock.Unlock()
if err := s.srv.initializeSecondaryCA(newProvider, roots, args.Config); err != nil {
return fmt.Errorf("Error updating secondary datacenter CA config: %v", err) return fmt.Errorf("Error updating secondary datacenter CA config: %v", err)
} }
cleanupNewProvider = false cleanupNewProvider = false
@ -226,7 +221,7 @@ func (s *ConnectCA) ConfigurationSet(
// If the config has been committed, update the local provider instance // If the config has been committed, update the local provider instance
cleanupNewProvider = false cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot) s.srv.caManager.setCAProvider(newProvider, newActiveRoot)
s.logger.Info("CA provider config updated") s.logger.Info("CA provider config updated")
@ -239,7 +234,7 @@ func (s *ConnectCA) ConfigurationSet(
// First up, sanity check that the current provider actually supports // First up, sanity check that the current provider actually supports
// cross-signing. // cross-signing.
oldProvider, _ := s.srv.getCAProvider() oldProvider, _ := s.srv.caManager.getCAProvider()
if oldProvider == nil { if oldProvider == nil {
return fmt.Errorf("internal error: CA provider is nil") return fmt.Errorf("internal error: CA provider is nil")
} }
@ -323,7 +318,7 @@ func (s *ConnectCA) ConfigurationSet(
// If the config has been committed, update the local provider instance // If the config has been committed, update the local provider instance
// and call teardown on the old provider // and call teardown on the old provider
cleanupNewProvider = false cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot) s.srv.caManager.setCAProvider(newProvider, newActiveRoot)
if err := oldProvider.Cleanup(); err != nil { if err := oldProvider.Cleanup(); err != nil {
s.logger.Warn("failed to clean up old provider", "provider", config.Provider) s.logger.Warn("failed to clean up old provider", "provider", config.Provider)
@ -457,7 +452,7 @@ func (s *ConnectCA) SignIntermediate(
return acl.ErrPermissionDenied return acl.ErrPermissionDenied
} }
provider, _ := s.srv.getCAProvider() provider, _ := s.srv.caManager.getCAProvider()
if provider == nil { if provider == nil {
return fmt.Errorf("internal error: CA provider is nil") return fmt.Errorf("internal error: CA provider is nil")
} }

View File

@ -328,7 +328,7 @@ func (s *Server) establishLeadership(ctx context.Context) error {
s.autopilot.Start(ctx) s.autopilot.Start(ctx)
// todo(kyhavlov): start a goroutine here for handling periodic CA rotation // todo(kyhavlov): start a goroutine here for handling periodic CA rotation
if err := s.initializeCA(); err != nil { if err := s.caManager.initializeCA(); err != nil {
return err return err
} }
@ -375,7 +375,7 @@ func (s *Server) revokeLeadership() {
s.stopConnectLeader() s.stopConnectLeader()
s.setCAProvider(nil, nil) s.caManager.setCAProvider(nil, nil)
s.stopACLTokenReaping() s.stopACLTokenReaping()

View File

@ -3,17 +3,13 @@ package consul
import ( import (
"context" "context"
"fmt" "fmt"
"reflect"
"strings"
"time" "time"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging"
uuid "github.com/hashicorp/go-uuid"
) )
const ( const (
@ -32,76 +28,36 @@ var (
maxRetryBackoff = 256 maxRetryBackoff = 256
) )
// initializeCAConfig is used to initialize the CA config if necessary // startConnectLeader starts multi-dc connect leader routines.
// when setting up the CA during establishLeadership func (s *Server) startConnectLeader() error {
func (s *Server) initializeCAConfig() (*structs.CAConfiguration, error) { if !s.config.ConnectEnabled {
state := s.fsm.State() return nil
_, config, err := state.CAConfig(nil)
if err != nil {
return nil, err
}
if config == nil {
config = s.config.CAConfig
if config.ClusterID == "" {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
config.ClusterID = id
}
} else if _, ok := config.Config["IntermediateCertTTL"]; !ok {
dup := *config
copied := make(map[string]interface{})
for k, v := range dup.Config {
copied[k] = v
}
copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String()
dup.Config = copied
config = &dup
} else {
return config, nil
} }
req := structs.CARequest{ // Start the Connect secondary DC actions if enabled.
Op: structs.CAOpSetConfig, if s.config.Datacenter != s.config.PrimaryDatacenter {
Config: config, s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.caManager.secondaryCARootWatch)
}
if resp, err := s.raftApply(structs.ConnectCARequestType, req); err != nil {
return nil, err
} else if respErr, ok := resp.(error); ok {
return nil, respErr
} }
return config, nil s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.caManager.intermediateCertRenewalWatch)
s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning)
return s.startIntentionConfigEntryMigration()
} }
// parseCARoot returns a filled-in structs.CARoot from a raw PEM value. // stopConnectLeader stops connect specific leader functions.
func parseCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) { func (s *Server) stopConnectLeader() {
id, err := connect.CalculateCertFingerprint(pemValue) s.leaderRoutineManager.Stop(intentionMigrationRoutineName)
if err != nil { s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
return nil, fmt.Errorf("error parsing root fingerprint: %v", err) s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
provider, _ := s.caManager.getCAProvider()
if provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
} }
rootCert, err := connect.ParseCert(pemValue)
if err != nil {
return nil, fmt.Errorf("error parsing root cert: %v", err)
} }
keyType, keyBits, err := connect.KeyInfoFromCert(rootCert)
if err != nil {
return nil, fmt.Errorf("error extracting root key info: %v", err)
}
return &structs.CARoot{
ID: id,
Name: fmt.Sprintf("%s CA Root Cert", strings.Title(provider)),
SerialNumber: rootCert.SerialNumber.Uint64(),
SigningKeyID: connect.EncodeSigningKeyID(rootCert.SubjectKeyId),
ExternalTrustDomain: clusterID,
NotBefore: rootCert.NotBefore,
NotAfter: rootCert.NotAfter,
RootCert: pemValue,
PrivateKeyType: keyType,
PrivateKeyBits: keyBits,
Active: true,
}, nil
} }
// createProvider returns a connect CA provider from the given config. // createProvider returns a connect CA provider from the given config.
@ -126,505 +82,6 @@ func (s *Server) createCAProvider(conf *structs.CAConfiguration) (ca.Provider, e
return p, nil return p, nil
} }
// getCAProvider is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getCAProvider() (ca.Provider, *structs.CARoot) {
retries := 0
var result ca.Provider
var resultRoot *structs.CARoot
for result == nil {
s.caProviderLock.RLock()
result = s.caProvider
resultRoot = s.caProviderRoot
s.caProviderLock.RUnlock()
// In cases where an agent is started with managed proxies, we may ask
// for the provider before establishLeadership completes. If we're the
// leader, then wait and get the provider again
if result == nil && s.IsLeader() && retries < 10 {
retries++
time.Sleep(50 * time.Millisecond)
continue
}
break
}
return result, resultRoot
}
// setCAProvider is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
s.caProviderLock.Lock()
defer s.caProviderLock.Unlock()
s.caProvider = newProvider
s.caProviderRoot = root
}
// initializeCA sets up the CA provider when gaining leadership, either bootstrapping
// the CA if this is the primary DC or making a remote RPC for intermediate signing
// if this is a secondary DC.
func (s *Server) initializeCA() error {
connectLogger := s.loggers.Named(logging.Connect)
// Bail if connect isn't enabled.
if !s.config.ConnectEnabled {
return nil
}
// Initialize the provider based on the current config.
conf, err := s.initializeCAConfig()
if err != nil {
return err
}
provider, err := s.createCAProvider(conf)
if err != nil {
return err
}
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
s.setCAProvider(provider, nil)
// If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0
if s.config.PrimaryDatacenter != s.config.Datacenter {
versionOk, foundPrimary := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !foundPrimary {
connectLogger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA")
// return nil because we will initialize the secondary CA later
return nil
} else if !versionOk {
// return nil because we will initialize the secondary CA later
connectLogger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA",
"min_version", minMultiDCConnectVersion.String(),
)
return nil
}
// Get the root CA to see if we need to refresh our intermediate.
args := structs.DCSpecificRequest{
Datacenter: s.config.PrimaryDatacenter,
}
var roots structs.IndexedCARoots
if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil {
return err
}
// Configure the CA provider and initialize the intermediate certificate if necessary.
if err := s.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := s.initializeSecondaryCA(provider, roots, nil); err != nil {
return err
}
connectLogger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider)
return nil
}
return s.initializeRootCA(provider, conf)
}
// initializeRootCA runs the initialization logic for a root CA.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error {
connectLogger := s.loggers.Named(logging.Connect)
pCfg := ca.ProviderConfig{
ClusterID: conf.ClusterID,
Datacenter: s.config.Datacenter,
IsPrimary: true,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := provider.GenerateRoot(); err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
// Get the active root cert from the CA
rootPEM, err := provider.ActiveRoot()
if err != nil {
return fmt.Errorf("error getting root cert: %v", err)
}
rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID)
if err != nil {
return err
}
// Also create the intermediate CA, which is the one that actually signs leaf certs
interPEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating intermediate cert: %v", err)
}
_, err = connect.ParseCert(interPEM)
if err != nil {
return fmt.Errorf("error getting intermediate cert: %v", err)
}
// If the provider has state to persist and it's changed or new then update
// CAConfig.
pState, err := provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
if !reflect.DeepEqual(conf.State, pState) {
// Update the CAConfig in raft to persist the provider state
conf.State = pState
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: conf,
}
if _, err = s.raftApply(structs.ConnectCARequestType, req); err != nil {
return fmt.Errorf("error persisting provider state: %v", err)
}
}
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot != nil {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}
rootCA.IntermediateCerts = activeRoot.IntermediateCerts
s.setCAProvider(provider, rootCA)
return nil
}
// Get the highest index
idx, _, err := state.CARoots(nil)
if err != nil {
return err
}
// Store the root cert in raft
resp, err := s.raftApply(structs.ConnectCARequestType, &structs.CARequest{
Op: structs.CAOpSetRoots,
Index: idx,
Roots: []*structs.CARoot{rootCA},
})
if err != nil {
connectLogger.Error("Raft apply failed", "error", err)
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
s.setCAProvider(provider, rootCA)
connectLogger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting
// it signed by the primary DC if the root CA of the primary DC has changed since the last
// intermediate.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) initializeSecondaryCA(provider ca.Provider, primaryRoots structs.IndexedCARoots, config *structs.CAConfiguration) error {
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
var (
storedRootID string
expectedSigningKeyID string
currentSigningKeyID string
activeSecondaryRoot *structs.CARoot
)
if activeIntermediate != "" {
// In the event that we already have an intermediate, we must have
// already replicated some primary root information locally, so check
// to see if we're up to date by fetching the rootID and the
// signingKeyID used in the secondary.
//
// Note that for the same rootID the primary representation of the root
// will have a different SigningKeyID field than the secondary
// representation of the same root. This is because it's derived from
// the intermediate which is different in all datacenters.
storedRoot, err := provider.ActiveRoot()
if err != nil {
return err
}
storedRootID, err = connect.CalculateCertFingerprint(storedRoot)
if err != nil {
return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot)
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
// This will fetch the secondary's exact current representation of the
// active root. Note that this data should only be used if the IDs
// match, otherwise it's out of date and should be regenerated.
_, activeSecondaryRoot, err = s.fsm.State().CARootActive(nil)
if err != nil {
return err
}
if activeSecondaryRoot != nil {
currentSigningKeyID = activeSecondaryRoot.SigningKeyID
}
}
// Determine which of the provided PRIMARY representations of roots is the
// active one. We'll use this as a template to generate any new root
// representations meant for this secondary.
var newActiveRoot *structs.CARoot
for _, root := range primaryRoots.Roots {
if root.ID == primaryRoots.ActiveRootID && root.Active {
newActiveRoot = root
break
}
}
if newActiveRoot == nil {
return fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := false
if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
newIntermediate := false
if needsNewIntermediate {
if err := s.getIntermediateCASigned(provider, newActiveRoot); err != nil {
return err
}
newIntermediate = true
} else {
// Discard the primary's representation since our local one is
// sufficiently up to date.
newActiveRoot = activeSecondaryRoot
}
// Update the roots list in the state store if there's a new active root.
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
// Determine whether a root update is needed, and persist the roots/config accordingly.
var newRoot *structs.CARoot
if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate {
newRoot = newActiveRoot
}
if err := s.persistNewRootAndConfig(provider, newRoot, config); err != nil {
return err
}
s.setCAProvider(provider, newActiveRoot)
return nil
}
// persistNewRootAndConfig is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
// If newActiveRoot is non-nil, it will be appended to the current roots list.
// If config is non-nil, it will be used to overwrite the existing config.
func (s *Server) persistNewRootAndConfig(provider ca.Provider, newActiveRoot *structs.CARoot, config *structs.CAConfiguration) error {
connectLogger := s.loggers.Named(logging.Connect)
state := s.fsm.State()
idx, oldRoots, err := state.CARoots(nil)
if err != nil {
return err
}
var newConf structs.CAConfiguration
_, storedConfig, err := state.CAConfig(nil)
if err != nil {
return err
}
if storedConfig == nil {
return fmt.Errorf("local CA not initialized yet")
}
if config != nil {
newConf = *config
} else {
newConf = *storedConfig
}
newConf.ModifyIndex = storedConfig.ModifyIndex
if newActiveRoot != nil {
newConf.ClusterID = newActiveRoot.ExternalTrustDomain
}
// Persist any state the provider needs us to
newConf.State, err = provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
// If there's a new active root, copy the root list and append it, updating
// the old root with the time it was rotated out.
var newRoots structs.CARoots
if newActiveRoot != nil {
for _, r := range oldRoots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
}
if newRoot.ExternalTrustDomain == "" {
newRoot.ExternalTrustDomain = config.ClusterID
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
} else {
newRoots = oldRoots
}
args := &structs.CARequest{
Op: structs.CAOpSetRootsAndConfig,
Index: idx,
Roots: newRoots,
Config: &newConf,
}
resp, err := s.raftApply(structs.ConnectCARequestType, &args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
connectLogger.Info("updated root certificates from primary datacenter")
return nil
}
// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter.
// This is only run for CAs that require an intermediary in the primary DC, such as Vault.
// This function is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error {
connectLogger := s.loggers.Named(logging.Connect)
// Generate and sign an intermediate cert using the root CA.
intermediatePEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating new intermediate cert: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
connectLogger.Info("generated new intermediate certificate for primary datacenter")
return nil
}
// getIntermediateCASigned is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error {
connectLogger := s.loggers.Named(logging.Connect)
csr, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
}
var intermediatePEM string
if err := s.forwardDC("ConnectCA.SignIntermediate", s.config.PrimaryDatacenter, s.generateCASignRequest(csr), &intermediatePEM); err != nil {
// this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership
connectLogger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err)
return nil
}
if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil {
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
connectLogger.Info("received new intermediate certificate from primary datacenter")
return nil
}
func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: s.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()},
}
}
// startConnectLeader starts multi-dc connect leader routines.
func (s *Server) startConnectLeader() error {
if !s.config.ConnectEnabled {
return nil
}
// Start the Connect secondary DC actions if enabled.
if s.config.Datacenter != s.config.PrimaryDatacenter {
s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.secondaryCARootWatch)
}
s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.intermediateCertRenewalWatch)
s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning)
return s.startIntentionConfigEntryMigration()
}
// stopConnectLeader stops connect specific leader functions.
func (s *Server) stopConnectLeader() {
s.leaderRoutineManager.Stop(intentionMigrationRoutineName)
s.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
s.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
s.leaderRoutineManager.Stop(caRootPruningRoutineName)
// If the provider implements NeedsStop, we call Stop to perform any shutdown actions.
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
provider, _ := s.getCAProvider()
if provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
}
}
}
func (s *Server) runCARootPruning(ctx context.Context) error { func (s *Server) runCARootPruning(ctx context.Context) error {
ticker := time.NewTicker(caRootPruneInterval) ticker := time.NewTicker(caRootPruneInterval)
defer ticker.Stop() defer ticker.Stop()
@ -694,155 +151,6 @@ func (s *Server) pruneCARoots() error {
return nil return nil
} }
// intermediateCertRenewalWatch checks the intermediate cert for
// expiration. As soon as more than half the time a cert is valid has passed,
// it will try to renew it.
func (s *Server) intermediateCertRenewalWatch(ctx context.Context) error {
connectLogger := s.loggers.Named(logging.Connect)
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
for {
select {
case <-ctx.Done():
return nil
case <-time.After(structs.IntermediateCertRenewInterval):
retryLoopBackoffAbortOnSuccess(ctx, func() error {
s.caProviderReconfigurationLock.Lock()
defer s.caProviderReconfigurationLock.Unlock()
provider, _ := s.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// If this isn't the primary, make sure the CA has been initialized.
if !isPrimary && !s.configuredSecondaryCA() {
return fmt.Errorf("secondary CA is not yet configured.")
}
state := s.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
// If this is the primary, check if this is a provider that uses an intermediate cert. If
// it isn't, we don't need to check for a renewal.
if isPrimary {
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok {
return nil
}
}
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
if activeIntermediate == "" {
return fmt.Errorf("datacenter doesn't have an active intermediate.")
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer),
intermediateCert.NotAfter) {
return nil
}
renewalFunc := s.getIntermediateCAPrimary
if !isPrimary {
renewalFunc = s.getIntermediateCASigned
}
if err := renewalFunc(provider, activeRoot); err != nil {
return err
}
if err := s.persistNewRootAndConfig(provider, activeRoot, nil); err != nil {
return err
}
s.setCAProvider(provider, activeRoot)
return nil
}, func(err error) {
connectLogger.Error("error renewing intermediate certs",
"routine", intermediateCertRenewWatchRoutineName,
"error", err,
)
})
}
}
}
// secondaryCARootWatch maintains a blocking query to the primary datacenter's
// ConnectCA.Roots endpoint to monitor when it needs to request a new signed
// intermediate certificate.
func (s *Server) secondaryCARootWatch(ctx context.Context) error {
connectLogger := s.loggers.Named(logging.Connect)
args := structs.DCSpecificRequest{
Datacenter: s.config.PrimaryDatacenter,
QueryOptions: structs.QueryOptions{
// the maximum time the primary roots watch query can block before returning
MaxQueryTime: s.config.MaxQueryTime,
},
}
connectLogger.Debug("starting Connect CA root replication from primary datacenter", "primary", s.config.PrimaryDatacenter)
retryLoopBackoff(ctx, func() error {
var roots structs.IndexedCARoots
if err := s.forwardDC("ConnectCA.Roots", s.config.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
}
// Check to see if the primary has been upgraded in case we're waiting to switch to
// secondary mode.
provider, _ := s.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
if !s.configuredSecondaryCA() {
versionOk, primaryFound := ServersInDCMeetMinimumVersion(s, s.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !primaryFound {
return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization")
}
if versionOk {
if err := s.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize secondary CA provider: %v", err)
}
}
}
// Run the secondary CA init routine to see if we need to request a new
// intermediate.
if s.configuredSecondaryCA() {
if err := s.initializeSecondaryCA(provider, roots, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
}
args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index)
return nil
}, func(err error) {
connectLogger.Error("CA root replication failed, will retry",
"routine", secondaryCARootWatchRoutineName,
"error", err,
)
})
return nil
}
// retryLoopBackoff loops a given function indefinitely, backing off exponentially // retryLoopBackoff loops a given function indefinitely, backing off exponentially
// upon errors up to a maximum of maxRetryBackoff seconds. // upon errors up to a maximum of maxRetryBackoff seconds.
func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error)) { func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error)) {
@ -898,46 +206,6 @@ func nextIndexVal(prevIdx, idx uint64) uint64 {
return idx return idx
} }
// initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter.
// It is being called while holding caProviderReconfigurationLock which means
// it must never take that lock itself or call anything that does.
func (s *Server) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error {
if roots.TrustDomain == "" {
return fmt.Errorf("trust domain from primary datacenter is not initialized")
}
clusterID := strings.Split(roots.TrustDomain, ".")[0]
_, conf, err := s.fsm.State().CAConfig(nil)
if err != nil {
return err
}
pCfg := ca.ProviderConfig{
ClusterID: clusterID,
Datacenter: s.config.Datacenter,
IsPrimary: false,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
s.actingSecondaryLock.Lock()
s.actingSecondaryCA = true
s.actingSecondaryLock.Unlock()
return nil
}
// configuredSecondaryCA is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (s *Server) configuredSecondaryCA() bool {
s.actingSecondaryLock.RLock()
defer s.actingSecondaryLock.RUnlock()
return s.actingSecondaryCA
}
// halfTime returns a duration that is half the time between notBefore and // halfTime returns a duration that is half the time between notBefore and
// notAfter. // notAfter.
func halfTime(notBefore, notAfter time.Time) time.Duration { func halfTime(notBefore, notAfter time.Time) time.Duration {
@ -953,3 +221,11 @@ func lessThanHalfTimePassed(now, notBefore, notAfter time.Time) bool {
t := notBefore.Add(halfTime(notBefore, notAfter)) t := notBefore.Add(halfTime(notBefore, notAfter))
return t.Sub(now) > 0 return t.Sub(now) > 0
} }
func (s *Server) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: s.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: s.tokens.ReplicationToken()},
}
}

View File

@ -0,0 +1,821 @@
package consul
import (
"context"
"fmt"
"reflect"
"strings"
"sync"
"time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog"
uuid "github.com/hashicorp/go-uuid"
)
type CAState string
const (
CAStateUninitialized CAState = "UNINITIALIZED"
CAStateInitializing = "INITIALIZING"
CAStateReady = "READY"
CAStateRenewIntermediate = "RENEWING"
CAStateReconfig = "RECONFIGURING"
)
// CAManager is a wrapper around CA operations such as updating roots, an intermediate
// or the configuration. All operations should go through the CAManager in order to
// avoid data races.
type CAManager struct {
srv *Server
logger hclog.Logger
// provider is the current CA provider in use for Connect. This is
// only non-nil when we are the leader.
provider ca.Provider
// providerRoot is the CARoot that was stored along with the ca.Provider
// active. It's only updated in lock-step with the provider. This prevents
// races between state updates to active roots and the fetch of the provider
// instance.
providerRoot *structs.CARoot
providerLock sync.RWMutex
// primaryRoots is the most recently seen state of the root CAs from the primary datacenter.
// This is protected by the stateLock and updated by initializeCA and the root CA watch routine.
primaryRoots structs.IndexedCARoots
// actingSecondaryCA is whether this datacenter has been initialized as a secondary CA.
actingSecondaryCA bool
state CAState
stateLock sync.RWMutex
}
func NewCAManager(srv *Server) *CAManager {
return &CAManager{
srv: srv,
logger: srv.loggers.Named(logging.Connect),
state: CAStateUninitialized,
}
}
// setState attempts to update the CA state to the given state.
// If the current state is not READY, this will fail. The only exception is when
// the current state is UNINITIALIZED, and the function is called with CAStateInitializing.
func (c *CAManager) setState(newState CAState) error {
c.stateLock.RLock()
state := c.state
c.stateLock.RUnlock()
if state == CAStateReady || (state == CAStateUninitialized && newState == CAStateInitializing) {
c.stateLock.Lock()
c.state = newState
c.stateLock.Unlock()
} else {
return fmt.Errorf("CA is already in %s state", state)
}
return nil
}
// setReady sets the CA state back to READY. This should only be called by a function
// that has successfully called setState beforehand.
func (c *CAManager) setReady() {
c.stateLock.Lock()
c.state = CAStateReady
c.stateLock.Unlock()
}
// initializeCAConfig is used to initialize the CA config if necessary
// when setting up the CA during establishLeadership
func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) {
state := c.srv.fsm.State()
_, config, err := state.CAConfig(nil)
if err != nil {
return nil, err
}
if config == nil {
config = c.srv.config.CAConfig
if config.ClusterID == "" {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
config.ClusterID = id
}
} else if _, ok := config.Config["IntermediateCertTTL"]; !ok {
dup := *config
copied := make(map[string]interface{})
for k, v := range dup.Config {
copied[k] = v
}
copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String()
dup.Config = copied
config = &dup
} else {
return config, nil
}
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: config,
}
if resp, err := c.srv.raftApply(structs.ConnectCARequestType, req); err != nil {
return nil, err
} else if respErr, ok := resp.(error); ok {
return nil, respErr
}
return config, nil
}
// parseCARoot returns a filled-in structs.CARoot from a raw PEM value.
func parseCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) {
id, err := connect.CalculateCertFingerprint(pemValue)
if err != nil {
return nil, fmt.Errorf("error parsing root fingerprint: %v", err)
}
rootCert, err := connect.ParseCert(pemValue)
if err != nil {
return nil, fmt.Errorf("error parsing root cert: %v", err)
}
keyType, keyBits, err := connect.KeyInfoFromCert(rootCert)
if err != nil {
return nil, fmt.Errorf("error extracting root key info: %v", err)
}
return &structs.CARoot{
ID: id,
Name: fmt.Sprintf("%s CA Root Cert", strings.Title(provider)),
SerialNumber: rootCert.SerialNumber.Uint64(),
SigningKeyID: connect.EncodeSigningKeyID(rootCert.SubjectKeyId),
ExternalTrustDomain: clusterID,
NotBefore: rootCert.NotBefore,
NotAfter: rootCert.NotAfter,
RootCert: pemValue,
PrivateKeyType: keyType,
PrivateKeyBits: keyBits,
Active: true,
}, nil
}
// getCAProvider is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) getCAProvider() (ca.Provider, *structs.CARoot) {
retries := 0
var result ca.Provider
var resultRoot *structs.CARoot
for result == nil {
c.providerLock.RLock()
result = c.provider
resultRoot = c.providerRoot
c.providerLock.RUnlock()
// In cases where an agent is started with managed proxies, we may ask
// for the provider before establishLeadership completes. If we're the
// leader, then wait and get the provider again
if result == nil && c.srv.IsLeader() && retries < 10 {
retries++
time.Sleep(50 * time.Millisecond)
continue
}
break
}
return result, resultRoot
}
// setCAProvider is being called while holding the stateLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
c.providerLock.Lock()
c.provider = newProvider
c.providerRoot = root
c.providerLock.Unlock()
}
// initializeCA sets up the CA provider when gaining leadership, either bootstrapping
// the CA if this is the primary DC or making a remote RPC for intermediate signing
// if this is a secondary DC.
func (c *CAManager) initializeCA() error {
// Bail if connect isn't enabled.
if !c.srv.config.ConnectEnabled {
return nil
}
err := c.setState(CAStateInitializing)
if err != nil {
return err
}
defer c.setReady()
// Initialize the provider based on the current config.
conf, err := c.initializeCAConfig()
if err != nil {
return err
}
provider, err := c.srv.createCAProvider(conf)
if err != nil {
return err
}
c.setCAProvider(provider, nil)
// If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0
if c.srv.config.PrimaryDatacenter != c.srv.config.Datacenter {
versionOk, foundPrimary := ServersInDCMeetMinimumVersion(c.srv, c.srv.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !foundPrimary {
c.logger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA")
// return nil because we will initialize the secondary CA later
return nil
} else if !versionOk {
// return nil because we will initialize the secondary CA later
c.logger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA",
"min_version", minMultiDCConnectVersion.String(),
)
return nil
}
// Get the root CA to see if we need to refresh our intermediate.
args := structs.DCSpecificRequest{
Datacenter: c.srv.config.PrimaryDatacenter,
}
var roots structs.IndexedCARoots
if err := c.srv.forwardDC("ConnectCA.Roots", c.srv.config.PrimaryDatacenter, &args, &roots); err != nil {
return err
}
c.primaryRoots = roots
// Configure the CA provider and initialize the intermediate certificate if necessary.
if err := c.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := c.initializeSecondaryCA(provider, nil); err != nil {
return err
}
c.logger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider)
return nil
}
return c.initializeRootCA(provider, conf)
}
// initializeRootCA runs the initialization logic for a root CA.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfiguration) error {
pCfg := ca.ProviderConfig{
ClusterID: conf.ClusterID,
Datacenter: c.srv.config.Datacenter,
IsPrimary: true,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := provider.GenerateRoot(); err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
// Get the active root cert from the CA
rootPEM, err := provider.ActiveRoot()
if err != nil {
return fmt.Errorf("error getting root cert: %v", err)
}
rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID)
if err != nil {
return err
}
// Also create the intermediate CA, which is the one that actually signs leaf certs
interPEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating intermediate cert: %v", err)
}
_, err = connect.ParseCert(interPEM)
if err != nil {
return fmt.Errorf("error getting intermediate cert: %v", err)
}
// If the provider has state to persist and it's changed or new then update
// CAConfig.
pState, err := provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
if !reflect.DeepEqual(conf.State, pState) {
// Update the CAConfig in raft to persist the provider state
conf.State = pState
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: conf,
}
if _, err = c.srv.raftApply(structs.ConnectCARequestType, req); err != nil {
return fmt.Errorf("error persisting provider state: %v", err)
}
}
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := c.srv.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot != nil {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}
rootCA.IntermediateCerts = activeRoot.IntermediateCerts
c.setCAProvider(provider, rootCA)
return nil
}
// Get the highest index
idx, _, err := state.CARoots(nil)
if err != nil {
return err
}
// Store the root cert in raft
resp, err := c.srv.raftApply(structs.ConnectCARequestType, &structs.CARequest{
Op: structs.CAOpSetRoots,
Index: idx,
Roots: []*structs.CARoot{rootCA},
})
if err != nil {
c.logger.Error("Raft apply failed", "error", err)
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
c.setCAProvider(provider, rootCA)
c.logger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// initializeSecondaryCA runs the routine for generating an intermediate CA CSR and getting
// it signed by the primary DC if the root CA of the primary DC has changed since the last
// intermediate.
// It is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) initializeSecondaryCA(provider ca.Provider, config *structs.CAConfiguration) error {
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
var (
storedRootID string
expectedSigningKeyID string
currentSigningKeyID string
activeSecondaryRoot *structs.CARoot
)
if activeIntermediate != "" {
// In the event that we already have an intermediate, we must have
// already replicated some primary root information locally, so check
// to see if we're up to date by fetching the rootID and the
// signingKeyID used in the secondary.
//
// Note that for the same rootID the primary representation of the root
// will have a different SigningKeyID field than the secondary
// representation of the same root. This is because it's derived from
// the intermediate which is different in all datacenters.
storedRoot, err := provider.ActiveRoot()
if err != nil {
return err
}
storedRootID, err = connect.CalculateCertFingerprint(storedRoot)
if err != nil {
return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot)
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
// This will fetch the secondary's exact current representation of the
// active root. Note that this data should only be used if the IDs
// match, otherwise it's out of date and should be regenerated.
_, activeSecondaryRoot, err = c.srv.fsm.State().CARootActive(nil)
if err != nil {
return err
}
if activeSecondaryRoot != nil {
currentSigningKeyID = activeSecondaryRoot.SigningKeyID
}
}
// Determine which of the provided PRIMARY representations of roots is the
// active one. We'll use this as a template to generate any new root
// representations meant for this secondary.
var newActiveRoot *structs.CARoot
for _, root := range c.primaryRoots.Roots {
if root.ID == c.primaryRoots.ActiveRootID && root.Active {
newActiveRoot = root
break
}
}
if newActiveRoot == nil {
return fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := false
if activeIntermediate == "" || storedRootID != c.primaryRoots.ActiveRootID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
newIntermediate := false
if needsNewIntermediate {
if err := c.getIntermediateCASigned(provider, newActiveRoot); err != nil {
return err
}
newIntermediate = true
} else {
// Discard the primary's representation since our local one is
// sufficiently up to date.
newActiveRoot = activeSecondaryRoot
}
// Update the roots list in the state store if there's a new active root.
state := c.srv.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
// Determine whether a root update is needed, and persist the roots/config accordingly.
var newRoot *structs.CARoot
if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate {
newRoot = newActiveRoot
}
if err := c.persistNewRootAndConfig(provider, newRoot, config); err != nil {
return err
}
c.setCAProvider(provider, newActiveRoot)
return nil
}
// persistNewRootAndConfig is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
// If newActiveRoot is non-nil, it will be appended to the current roots list.
// If config is non-nil, it will be used to overwrite the existing config.
func (c *CAManager) persistNewRootAndConfig(provider ca.Provider, newActiveRoot *structs.CARoot, config *structs.CAConfiguration) error {
state := c.srv.fsm.State()
idx, oldRoots, err := state.CARoots(nil)
if err != nil {
return err
}
var newConf structs.CAConfiguration
_, storedConfig, err := state.CAConfig(nil)
if err != nil {
return err
}
if storedConfig == nil {
return fmt.Errorf("local CA not initialized yet")
}
if config != nil {
newConf = *config
} else {
newConf = *storedConfig
}
newConf.ModifyIndex = storedConfig.ModifyIndex
if newActiveRoot != nil {
newConf.ClusterID = newActiveRoot.ExternalTrustDomain
}
// Persist any state the provider needs us to
newConf.State, err = provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
// If there's a new active root, copy the root list and append it, updating
// the old root with the time it was rotated out.
var newRoots structs.CARoots
if newActiveRoot != nil {
for _, r := range oldRoots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
}
if newRoot.ExternalTrustDomain == "" {
newRoot.ExternalTrustDomain = config.ClusterID
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
} else {
newRoots = oldRoots
}
args := &structs.CARequest{
Op: structs.CAOpSetRootsAndConfig,
Index: idx,
Roots: newRoots,
Config: &newConf,
}
resp, err := c.srv.raftApply(structs.ConnectCARequestType, &args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
c.logger.Info("updated root certificates from primary datacenter")
return nil
}
// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter.
// This is only run for CAs that require an intermediary in the primary DC, such as Vault.
// This function is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error {
// Generate and sign an intermediate cert using the root CA.
intermediatePEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating new intermediate cert: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
c.logger.Info("generated new intermediate certificate for primary datacenter")
return nil
}
// getIntermediateCASigned is being called while holding caProviderReconfigurationLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error {
csr, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
}
var intermediatePEM string
if err := c.srv.forwardDC("ConnectCA.SignIntermediate", c.srv.config.PrimaryDatacenter, c.srv.generateCASignRequest(csr), &intermediatePEM); err != nil {
// this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership
c.logger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err)
return nil
}
if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert); err != nil {
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
intermediateCert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing intermediate cert: %v", err)
}
// Append the new intermediate to our local active root entry. This is
// where the root representations start to diverge.
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM)
newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
c.logger.Info("received new intermediate certificate from primary datacenter")
return nil
}
// intermediateCertRenewalWatch checks the intermediate cert for
// expiration. As soon as more than half the time a cert is valid has passed,
// it will try to renew it.
func (c *CAManager) intermediateCertRenewalWatch(ctx context.Context) error {
isPrimary := c.srv.config.Datacenter == c.srv.config.PrimaryDatacenter
for {
select {
case <-ctx.Done():
return nil
case <-time.After(structs.IntermediateCertRenewInterval):
retryLoopBackoffAbortOnSuccess(ctx, func() error {
if !isPrimary {
c.logger.Info("starting check for intermediate renewal")
}
// Grab the 'lock' right away so the provider/config can't be changed out while we check
// the intermediate.
if err := c.setState(CAStateRenewIntermediate); err != nil {
return err
}
defer c.setReady()
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// If this isn't the primary, make sure the CA has been initialized.
if !isPrimary && !c.configuredSecondaryCA() {
return fmt.Errorf("secondary CA is not yet configured.")
}
state := c.srv.fsm.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
// If this is the primary, check if this is a provider that uses an intermediate cert. If
// it isn't, we don't need to check for a renewal.
if isPrimary {
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok {
return nil
}
} else {
c.logger.Info("Checking for intermediate renewal")
}
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
if activeIntermediate == "" {
return fmt.Errorf("datacenter doesn't have an active intermediate.")
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer),
intermediateCert.NotAfter) {
return nil
}
// Enough time has passed, go ahead with getting a new intermediate.
renewalFunc := c.getIntermediateCAPrimary
if !isPrimary {
renewalFunc = c.getIntermediateCASigned
}
if err := renewalFunc(provider, activeRoot); err != nil {
return err
}
if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil {
return err
}
c.setCAProvider(provider, activeRoot)
return nil
}, func(err error) {
c.logger.Error("error renewing intermediate certs",
"routine", intermediateCertRenewWatchRoutineName,
"error", err,
)
})
}
}
}
// secondaryCARootWatch maintains a blocking query to the primary datacenter's
// ConnectCA.Roots endpoint to monitor when it needs to request a new signed
// intermediate certificate.
func (c *CAManager) secondaryCARootWatch(ctx context.Context) error {
args := structs.DCSpecificRequest{
Datacenter: c.srv.config.PrimaryDatacenter,
QueryOptions: structs.QueryOptions{
// the maximum time the primary roots watch query can block before returning
MaxQueryTime: c.srv.config.MaxQueryTime,
},
}
c.logger.Debug("starting Connect CA root replication from primary datacenter", "primary", c.srv.config.PrimaryDatacenter)
retryLoopBackoff(ctx, func() error {
var roots structs.IndexedCARoots
if err := c.srv.forwardDC("ConnectCA.Roots", c.srv.config.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
}
// Update the state first to claim the 'lock'.
if err := c.setState(CAStateReconfig); err != nil {
return err
}
defer c.setReady()
// Update the cached primary roots now that the lock is held.
c.primaryRoots = roots
// Check to see if the primary has been upgraded in case we're waiting to switch to
// secondary mode.
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
if !c.configuredSecondaryCA() {
versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.srv, c.srv.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !primaryFound {
return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization")
}
if versionOk {
if err := c.initializeSecondaryProvider(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize secondary CA provider: %v", err)
}
}
}
// Run the secondary CA init routine to see if we need to request a new
// intermediate.
if c.configuredSecondaryCA() {
if err := c.initializeSecondaryCA(provider, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
}
args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index)
return nil
}, func(err error) {
c.logger.Error("CA root replication failed, will retry",
"routine", secondaryCARootWatchRoutineName,
"error", err,
)
})
return nil
}
// initializeSecondaryProvider configures the given provider for a secondary, non-root datacenter.
// It is being called while holding the stateLock in order to update actingSecondaryCA, which means
// it must never take that lock itself or call anything that does.
func (c *CAManager) initializeSecondaryProvider(provider ca.Provider, roots structs.IndexedCARoots) error {
if roots.TrustDomain == "" {
return fmt.Errorf("trust domain from primary datacenter is not initialized")
}
clusterID := strings.Split(roots.TrustDomain, ".")[0]
_, conf, err := c.srv.fsm.State().CAConfig(nil)
if err != nil {
return err
}
pCfg := ca.ProviderConfig{
ClusterID: clusterID,
Datacenter: c.srv.config.Datacenter,
IsPrimary: false,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
c.actingSecondaryCA = true
return nil
}
// configuredSecondaryCA returns true if we have been initialized as a secondary datacenter's CA.
func (c *CAManager) configuredSecondaryCA() bool {
c.stateLock.RLock()
defer c.stateLock.RUnlock()
return c.actingSecondaryCA
}

View File

@ -80,11 +80,11 @@ func TestLeader_SecondaryCA_Initialize(t *testing.T) {
s2.tokens.UpdateAgentToken(masterToken, token.TokenSourceConfig) s2.tokens.UpdateAgentToken(masterToken, token.TokenSourceConfig)
s2.tokens.UpdateReplicationToken(masterToken, token.TokenSourceConfig) s2.tokens.UpdateReplicationToken(masterToken, token.TokenSourceConfig)
testrpc.WaitForLeader(t, s2.RPC, "secondary")
// Create the WAN link // Create the WAN link
joinWAN(t, s2, s1) joinWAN(t, s2, s1)
testrpc.WaitForLeader(t, s2.RPC, "secondary")
waitForNewACLs(t, s1) waitForNewACLs(t, s1)
waitForNewACLs(t, s2) waitForNewACLs(t, s2)
@ -175,9 +175,7 @@ func waitForActiveCARoot(t *testing.T, srv *Server, expect *structs.CARoot) {
} }
func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) { func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) {
s.caProviderReconfigurationLock.Lock() return s.caManager.getCAProvider()
defer s.caProviderReconfigurationLock.Unlock()
return s.getCAProvider()
} }
func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) {

View File

@ -30,7 +30,6 @@ import (
"google.golang.org/grpc" "google.golang.org/grpc"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
ca "github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth" "github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
"github.com/hashicorp/consul/agent/consul/fsm" "github.com/hashicorp/consul/agent/consul/fsm"
@ -137,17 +136,11 @@ type Server struct {
// autopilot is the Autopilot instance for this server. // autopilot is the Autopilot instance for this server.
autopilot *autopilot.Autopilot autopilot *autopilot.Autopilot
// caProviderReconfigurationLock guards the provider reconfiguration. // autopilotWaitGroup is used to block until Autopilot shuts down.
caProviderReconfigurationLock sync.Mutex autopilotWaitGroup sync.WaitGroup
// caProvider is the current CA provider in use for Connect. This is
// only non-nil when we are the leader. // caManager is used to synchronize CA operations across the leader and RPC functions.
caProvider ca.Provider caManager *CAManager
// caProviderRoot is the CARoot that was stored along with the ca.Provider
// active. It's only updated in lock-step with the caProvider. This prevents
// races between state updates to active roots and the fetch of the provider
// instance.
caProviderRoot *structs.CARoot
caProviderLock sync.RWMutex
// rate limiter to use when signing leaf certificates // rate limiter to use when signing leaf certificates
caLeafLimiter connectSignRateLimiter caLeafLimiter connectSignRateLimiter
@ -298,10 +291,6 @@ type Server struct {
shutdownCh chan struct{} shutdownCh chan struct{}
shutdownLock sync.Mutex shutdownLock sync.Mutex
// State for whether this datacenter is acting as a secondary CA.
actingSecondaryCA bool
actingSecondaryLock sync.RWMutex
// dcSupportsIntentionsAsConfigEntries is used to determine whether we can // dcSupportsIntentionsAsConfigEntries is used to determine whether we can
// migrate old intentions into service-intentions config entries. All // migrate old intentions into service-intentions config entries. All
// servers in the local DC must be on a version of Consul supporting // servers in the local DC must be on a version of Consul supporting
@ -480,6 +469,7 @@ func NewServer(config *Config, flat Deps) (*Server, error) {
return nil, fmt.Errorf("Failed to start Raft: %v", err) return nil, fmt.Errorf("Failed to start Raft: %v", err)
} }
s.caManager = NewCAManager(s)
if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) { if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) {
go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh}) go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh})
} }

View File

@ -132,7 +132,7 @@ func (s *Server) getCARoots(ws memdb.WatchSet, state *state.Store) (*structs.Ind
} }
func (s *Server) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) { func (s *Server) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) {
provider, caRoot := s.getCAProvider() provider, caRoot := s.caManager.getCAProvider()
if provider == nil { if provider == nil {
return nil, fmt.Errorf("internal error: CA provider is nil") return nil, fmt.Errorf("internal error: CA provider is nil")
} else if caRoot == nil { } else if caRoot == nil {

View File

@ -1483,7 +1483,7 @@ func TestServer_CALogging(t *testing.T) {
defer s1.Shutdown() defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1") testrpc.WaitForLeader(t, s1.RPC, "dc1")
if _, ok := s1.caProvider.(ca.NeedsLogger); !ok { if _, ok := s1.caManager.provider.(ca.NeedsLogger); !ok {
t.Fatalf("provider does not implement NeedsLogger") t.Fatalf("provider does not implement NeedsLogger")
} }