open-consul/agent/consul/leader_connect_ca.go

1698 lines
56 KiB
Go
Raw Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package consul
import (
"context"
"crypto/x509"
"errors"
"fmt"
"net/url"
"reflect"
"strings"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-uuid"
"golang.org/x/time/rate"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/routine"
"github.com/hashicorp/consul/lib/semaphore"
)
type caState string
const (
caStateUninitialized caState = "UNINITIALIZED"
caStateInitializing caState = "INITIALIZING"
caStateInitialized caState = "INITIALIZED"
caStateRenewIntermediate caState = "RENEWING"
caStateReconfig caState = "RECONFIGURING"
)
// caServerDelegate is an interface for server operations for facilitating
// easier testing.
type caServerDelegate interface {
ca.ConsulProviderStateDelegate
State() *state.Store
IsLeader() bool
ApplyCALeafRequest() (uint64, error)
forwardDC(method, dc string, args interface{}, reply interface{}) error
generateCASignRequest(csr string) *structs.CASignRequest
ServersSupportMultiDCConnectCA() error
}
// CAManager is a wrapper around CA operations such as updating roots, an intermediate
// or the configuration. All operations should go through the CAManager in order to
// avoid data races.
type CAManager struct {
delegate caServerDelegate
serverConf *Config
logger hclog.Logger
// rate limiter to use when signing leaf certificates
caLeafLimiter connectSignRateLimiter
providerLock sync.RWMutex
// provider is the current CA provider in use for Connect. This is
// only non-nil when we are the leader.
provider ca.Provider
// providerRoot is the CARoot that was stored along with the ca.Provider
// active. It's only updated in lock-step with the provider. This prevents
// races between state updates to active roots and the fetch of the provider
// instance.
providerRoot *structs.CARoot
// stateLock protects the internal state used for administrative CA tasks.
stateLock sync.Mutex
state caState
primaryRoots structs.IndexedCARoots // The most recently seen state of the root CAs from the primary datacenter.
leaderRoutineManager *routine.Manager
// providerShim is used to test CAManager with a fake provider.
providerShim ca.Provider
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
// shim time.Now for testing
timeNow func() time.Time
}
type caDelegateWithState struct {
*Server
}
func (c *caDelegateWithState) State() *state.Store {
return c.fsm.State()
}
func (c *caDelegateWithState) ApplyCARequest(req *structs.CARequest) (interface{}, error) {
return c.Server.raftApplyMsgpack(structs.ConnectCARequestType, req)
}
func (c *caDelegateWithState) ApplyCALeafRequest() (uint64, error) {
// TODO(banks): when we implement IssuedCerts table we can use the insert to
// that as the raft index to return in response.
//
// UPDATE(mkeeler): The original implementation relied on updating the CAConfig
// and using its index as the ModifyIndex for certs. This was buggy. The long
// term goal is still to insert some metadata into raft about the certificates
// and use that raft index for the ModifyIndex. This is a partial step in that
// direction except that we only are setting an index and not storing the
// metadata.
req := structs.CALeafRequest{
Op: structs.CALeafOpIncrementIndex,
Datacenter: c.Server.config.Datacenter,
}
resp, err := c.Server.raftApplyMsgpack(structs.ConnectCALeafRequestType|structs.IgnoreUnknownTypeFlag, &req)
2021-07-12 14:37:25 +00:00
if err != nil {
return 0, err
}
modIdx, ok := resp.(uint64)
if !ok {
return 0, fmt.Errorf("Invalid response from updating the leaf cert index")
}
return modIdx, err
}
func (c *caDelegateWithState) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: c.Server.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: c.Server.tokens.ReplicationToken()},
}
}
func (c *caDelegateWithState) ServersSupportMultiDCConnectCA() error {
versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.Server, c.Server.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !primaryFound {
return fmt.Errorf("primary datacenter is unreachable")
}
if !versionOk {
return fmt.Errorf("all servers in the primary datacenter are not at the minimum version %v", minMultiDCConnectVersion)
}
return nil
}
func (c *caDelegateWithState) ProviderState(id string) (*structs.CAConsulProviderState, error) {
_, s, err := c.fsm.State().CAProviderState(id)
return s, err
}
func NewCAManager(delegate caServerDelegate, leaderRoutineManager *routine.Manager, logger hclog.Logger, config *Config) *CAManager {
return &CAManager{
delegate: delegate,
logger: logger,
serverConf: config,
state: caStateUninitialized,
leaderRoutineManager: leaderRoutineManager,
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
timeNow: time.Now,
}
}
// setState attempts to update the CA state to the given state.
// Valid state transitions are:
//
// caStateInitialized -> <any state except caStateInitializing>
// caStateUninitialized -> caStateInitializing
// caStateUninitialized -> caStateReconfig
//
// Other state transitions may be forced if the validateState parameter is set to false.
// This will mainly be used in deferred functions which aim to set the final status based
// a successful/error return.
func (c *CAManager) setState(newState caState, validateState bool) (caState, error) {
c.stateLock.Lock()
defer c.stateLock.Unlock()
state := c.state
if !validateState ||
(state == caStateInitialized && newState != caStateInitializing) ||
(state == caStateUninitialized && newState == caStateInitializing) ||
(state == caStateUninitialized && newState == caStateReconfig) {
c.state = newState
} else {
return state, &caStateError{Current: state}
}
return state, nil
}
type caStateError struct {
Current caState
}
func (e *caStateError) Error() string {
return fmt.Sprintf("CA is already in state %q", e.Current)
}
// secondarySetPrimaryRoots updates the most recently seen roots from the primary.
func (c *CAManager) secondarySetPrimaryRoots(newRoots structs.IndexedCARoots) {
// TODO: this could be a different lock, as long as its the same lock in secondaryGetPrimaryRoots
c.stateLock.Lock()
defer c.stateLock.Unlock()
c.primaryRoots = newRoots
}
func (c *CAManager) secondaryGetActivePrimaryCARoot() (*structs.CARoot, error) {
// TODO: this could be a different lock, as long as its the same lock in secondarySetPrimaryRoots
c.stateLock.Lock()
primaryRoots := c.primaryRoots
c.stateLock.Unlock()
for _, root := range primaryRoots.Roots {
if root.ID == primaryRoots.ActiveRootID && root.Active {
return root, nil
}
}
return nil, fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
// initializeCAConfig is used to initialize the CA config if necessary
// when setting up the CA during establishLeadership. The state should be set to
// non-ready before calling this.
func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) {
st := c.delegate.State()
_, config, err := st.CAConfig(nil)
if err != nil {
return nil, err
}
if config == nil {
config = c.serverConf.CAConfig
if c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter && config.ClusterID == "" {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
config.ClusterID = id
}
} else if _, ok := config.Config["IntermediateCertTTL"]; !ok {
dup := *config
copied := make(map[string]interface{})
for k, v := range dup.Config {
copied[k] = v
}
copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String()
dup.Config = copied
config = &dup
} else {
return config, nil
}
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: config,
}
if _, err := c.delegate.ApplyCARequest(&req); err != nil {
return nil, err
}
return config, nil
}
// newCARoot returns a filled-in structs.CARoot from a raw PEM value.
func newCARoot(pemValue, provider, clusterID string) (*structs.CARoot, error) {
primaryCert, err := connect.ParseCert(pemValue)
if err != nil {
return nil, err
}
keyType, keyBits, err := connect.KeyInfoFromCert(primaryCert)
if err != nil {
return nil, fmt.Errorf("error extracting root key info: %v", err)
}
return &structs.CARoot{
ID: connect.CalculateCertFingerprint(primaryCert.Raw),
2022-04-14 20:55:10 +00:00
Name: fmt.Sprintf("%s CA Primary Cert", providerPrettyName(provider)),
SerialNumber: primaryCert.SerialNumber.Uint64(),
SigningKeyID: connect.EncodeSigningKeyID(primaryCert.SubjectKeyId),
ExternalTrustDomain: clusterID,
NotBefore: primaryCert.NotBefore,
NotAfter: primaryCert.NotAfter,
RootCert: lib.EnsureTrailingNewline(pemValue),
PrivateKeyType: keyType,
PrivateKeyBits: keyBits,
Active: true,
}, nil
}
// getCAProvider returns the currently active instance of the CA Provider,
// as well as the active root.
func (c *CAManager) getCAProvider() (ca.Provider, *structs.CARoot) {
retries := 0
for {
c.providerLock.RLock()
result := c.provider
resultRoot := c.providerRoot
c.providerLock.RUnlock()
// In cases where an agent is started with managed proxies, we may ask
// for the provider before establishLeadership completes. If we're the
// leader, then wait and get the provider again
if result == nil && c.delegate.IsLeader() && retries < 10 {
retries++
time.Sleep(50 * time.Millisecond)
continue
}
return result, resultRoot
}
}
// setCAProvider is being called while holding the stateLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
c.providerLock.Lock()
c.provider = newProvider
c.providerRoot = root
c.providerLock.Unlock()
}
func (c *CAManager) Start(ctx context.Context) {
// Attempt to initialize the Connect CA now. This will
// happen during leader establishment and it would be great
// if the CA was ready to go once that process was finished.
if err := c.Initialize(); err != nil {
c.logger.Error("Failed to initialize Connect CA", "error", err)
// we failed to fully initialize the CA so we need to spawn a
// go routine to retry this process until it succeeds or we lose
// leadership and the go routine gets stopped.
c.leaderRoutineManager.Start(ctx, backgroundCAInitializationRoutineName, c.backgroundCAInitialization)
} else {
// We only start these if CA initialization was successful. If not the completion of the
// background CA initialization will start these routines.
c.startPostInitializeRoutines(ctx)
}
}
func (c *CAManager) Stop() {
c.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
c.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
c.leaderRoutineManager.Stop(backgroundCAInitializationRoutineName)
if provider, _ := c.getCAProvider(); provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
}
}
c.setState(caStateUninitialized, false)
c.primaryRoots = structs.IndexedCARoots{}
c.setCAProvider(nil, nil)
}
func (c *CAManager) startPostInitializeRoutines(ctx context.Context) {
// Start the Connect secondary DC actions if enabled.
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
c.leaderRoutineManager.Start(ctx, secondaryCARootWatchRoutineName, c.secondaryCARootWatch)
}
c.leaderRoutineManager.Start(ctx, intermediateCertRenewWatchRoutineName, c.runRenewIntermediate)
}
func (c *CAManager) backgroundCAInitialization(ctx context.Context) error {
retryLoopBackoffAbortOnSuccess(ctx, c.Initialize, func(err error) {
c.logger.Error("Failed to initialize Connect CA",
"routine", backgroundCAInitializationRoutineName,
"error", err,
)
})
if err := ctx.Err(); err != nil {
return err
}
c.logger.Info("Successfully initialized the Connect CA")
c.startPostInitializeRoutines(ctx)
return nil
}
// Initialize sets up the CA provider when gaining leadership, either bootstrapping
// the CA if this is the primary DC or making a remote RPC for intermediate signing
// if this is a secondary DC.
func (c *CAManager) Initialize() (reterr error) {
// Bail if connect isn't enabled.
if !c.serverConf.ConnectEnabled {
return nil
}
// Update the state before doing anything else.
_, err := c.setState(caStateInitializing, true)
var errCaState *caStateError
switch {
case errors.As(err, &errCaState) && errCaState.Current == caStateInitialized:
return nil
case err != nil:
return err
}
defer func() {
// Using named return values in deferred funcs isnt too common in our code
// but it is first class Go functionality. The error erturned from the
// main func will be available by its given name within deferred functions.
// See: https://blog.golang.org/defer-panic-and-recover
if reterr == nil {
c.setState(caStateInitialized, false)
} else {
c.setState(caStateUninitialized, false)
}
}()
// Initialize the provider based on the current config.
conf, err := c.initializeCAConfig()
if err != nil {
return err
}
provider, err := c.newProvider(conf)
if err != nil {
return err
}
c.setCAProvider(provider, nil)
if c.serverConf.PrimaryDatacenter == c.serverConf.Datacenter {
return c.primaryInitialize(provider, conf)
}
return c.secondaryInitialize(provider, conf)
}
func (c *CAManager) secondaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error {
if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil {
return fmt.Errorf("initialization will be deferred: %w", err)
}
// Get the root CA to see if we need to refresh our intermediate.
args := structs.DCSpecificRequest{
Datacenter: c.serverConf.PrimaryDatacenter,
}
var roots structs.IndexedCARoots
if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("failed to get CA roots from primary DC: %w", err)
}
c.secondarySetPrimaryRoots(roots)
// Configure the CA provider and initialize the intermediate certificate if necessary.
if err := c.secondaryInitializeProvider(provider, roots); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return err
}
c.logger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// createProvider returns a connect CA provider from the given config.
func (c *CAManager) newProvider(conf *structs.CAConfiguration) (ca.Provider, error) {
logger := c.logger.Named(conf.Provider)
switch conf.Provider {
case structs.ConsulCAProvider:
return ca.NewConsulProvider(c.delegate, logger), nil
case structs.VaultCAProvider:
return ca.NewVaultProvider(logger), nil
case structs.AWSCAProvider:
return ca.NewAWSProvider(logger), nil
default:
if c.providerShim != nil {
return c.providerShim, nil
}
return nil, fmt.Errorf("unknown CA provider %q", conf.Provider)
}
}
// primaryInitialize runs the initialization logic for a root CA. It should only
// be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) primaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error {
pCfg := ca.ProviderConfig{
ClusterID: conf.ClusterID,
Datacenter: c.serverConf.Datacenter,
IsPrimary: true,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
root, err := provider.GenerateRoot()
if err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
rootCA, err := newCARoot(root.PEM, conf.Provider, conf.ClusterID)
if err != nil {
return err
}
// TODO: https://github.com/hashicorp/consul/issues/12386
interPEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating intermediate cert: %v", err)
}
intermediateCert, err := connect.ParseCert(interPEM)
if err != nil {
return fmt.Errorf("error getting intermediate cert: %v", err)
}
// If the provider has state to persist and it's changed or new then update
// CAConfig.
pState, err := provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
if !reflect.DeepEqual(conf.State, pState) {
// Update the CAConfig in raft to persist the provider state
conf.State = pState
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: conf,
}
if _, err = c.delegate.ApplyCARequest(&req); err != nil {
return fmt.Errorf("error persisting provider state: %v", err)
}
}
var rootUpdateRequired bool
// Versions prior to 1.9.3, 1.8.8, and 1.7.12 incorrectly used the primary
// rootCA's subjectKeyID here instead of the intermediate. For
// provider=consul this didn't matter since there are no intermediates in
// the primaryDC, but for vault it does matter.
expectedSigningKeyID := connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
if rootCA.SigningKeyID != expectedSigningKeyID {
c.logger.Info("Correcting stored CARoot values",
"previous-signing-key", rootCA.SigningKeyID, "updated-signing-key", expectedSigningKeyID)
rootCA.SigningKeyID = expectedSigningKeyID
rootUpdateRequired = true
}
// Add the local leaf signing cert to the rootCA struct. This handles both
// upgrades of existing state, and new rootCA.
if c.getLeafSigningCertFromRoot(rootCA) != interPEM {
rootCA.IntermediateCerts = append(rootCA.IntermediateCerts, interPEM)
rootUpdateRequired = true
}
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := c.delegate.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot != nil && !rootUpdateRequired {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}
// TODO: why doesn't this c.setCAProvider(provider, activeRoot) ?
rootCA.IntermediateCerts = activeRoot.IntermediateCerts
c.setCAProvider(provider, rootCA)
c.logger.Info("initialized primary datacenter CA from existing CARoot with provider", "provider", conf.Provider)
return nil
}
if err := c.persistNewRootAndConfig(provider, rootCA, conf); err != nil {
return err
}
c.setCAProvider(provider, rootCA)
c.logger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// getLeafSigningCertFromRoot returns the PEM encoded certificate that should be used to
// sign leaf certificates in the local datacenter. The SubjectKeyId of the
// returned cert should always match the SigningKeyID of the CARoot.
//
// TODO: fix the data model so that we don't need this complicated lookup to
// find the leaf signing cert. See github.com/hashicorp/consul/issues/11347.
func (c *CAManager) getLeafSigningCertFromRoot(root *structs.CARoot) string {
if !c.isIntermediateUsedToSignLeaf() {
return root.RootCert
}
if len(root.IntermediateCerts) == 0 {
return ""
}
return root.IntermediateCerts[len(root.IntermediateCerts)-1]
}
// secondaryInitializeIntermediateCA generates a Certificate Signing Request (CSR)
// for the intermediate CA that is used to sign leaf certificates in the secondary.
// The CSR is signed by the primary DC and then persisted in the state store.
//
// This method should only be called while the state lock is held by setting the
// state to non-ready.
func (c *CAManager) secondaryInitializeIntermediateCA(provider ca.Provider, config *structs.CAConfiguration) error {
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
_, activeRoot, err := c.delegate.State().CARootActive(nil)
if err != nil {
return err
}
var currentSigningKeyID string
if activeRoot != nil {
currentSigningKeyID = activeRoot.SigningKeyID
}
var expectedSigningKeyID string
if activeIntermediate != "" {
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
}
newActiveRoot, err := c.secondaryGetActivePrimaryCARoot()
if err != nil {
return err
}
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := activeIntermediate == ""
if activeRoot != nil && newActiveRoot.ID != activeRoot.ID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
if needsNewIntermediate {
if err := c.secondaryRequestNewSigningCert(provider, newActiveRoot); err != nil {
return err
}
} else {
// Discard the primary's representation since our local one is
// sufficiently up to date.
newActiveRoot = activeRoot
}
// Determine whether a root update is needed, and persist the roots/config accordingly.
var newRoot *structs.CARoot
if activeRoot == nil || needsNewIntermediate {
newRoot = newActiveRoot
}
if err := c.persistNewRootAndConfig(provider, newRoot, config); err != nil {
return err
}
c.setCAProvider(provider, newActiveRoot)
return nil
}
// persistNewRootAndConfig should only be called while the state lock is held
// by setting the state to non-ready.
// If newActiveRoot is non-nil, it will be appended to the current roots list.
// If config is non-nil, it will be used to overwrite the existing config.
func (c *CAManager) persistNewRootAndConfig(provider ca.Provider, newActiveRoot *structs.CARoot, config *structs.CAConfiguration) error {
state := c.delegate.State()
idx, oldRoots, err := state.CARoots(nil)
if err != nil {
return err
}
// Look up the existing CA config if a new one wasn't provided.
var newConf structs.CAConfiguration
_, storedConfig, err := state.CAConfig(nil)
if err != nil {
return err
}
if storedConfig == nil {
return fmt.Errorf("local CA not initialized yet")
}
// Exit early if the change is a no-op.
if !shouldPersistNewRootAndConfig(newActiveRoot, storedConfig, config) {
return nil
}
if config != nil {
newConf = *config
} else {
newConf = *storedConfig
}
// Update the trust domain for the config if there's a new root, or keep the old
// one if the root isn't being updated.
newConf.ModifyIndex = storedConfig.ModifyIndex
if newActiveRoot != nil {
newConf.ClusterID = newActiveRoot.ExternalTrustDomain
} else {
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
newConf.ClusterID = activeRoot.ExternalTrustDomain
}
// Persist any state the provider needs us to
newConf.State, err = provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
// If there's a new active root, copy the root list and append it, updating
// the old root with the time it was rotated out.
var newRoots structs.CARoots
for _, r := range oldRoots {
newRoot := *r
if newRoot.Active && newActiveRoot != nil {
newRoot.Active = false
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
newRoot.RotatedOutAt = c.timeNow()
}
if newRoot.ExternalTrustDomain == "" {
newRoot.ExternalTrustDomain = newConf.ClusterID
}
newRoots = append(newRoots, &newRoot)
}
if newActiveRoot != nil {
newRoots = append(newRoots, newActiveRoot)
}
args := &structs.CARequest{
Op: structs.CAOpSetRootsAndConfig,
Index: idx,
Roots: newRoots,
Config: &newConf,
}
resp, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
c.logger.Info("updated root certificates from primary datacenter")
return nil
}
func shouldPersistNewRootAndConfig(newActiveRoot *structs.CARoot, oldConfig, newConfig *structs.CAConfiguration) bool {
if newActiveRoot != nil {
return true
}
if newConfig == nil {
return false
}
return newConfig.Provider == oldConfig.Provider && reflect.DeepEqual(newConfig.Config, oldConfig.Config)
}
func (c *CAManager) UpdateConfiguration(args *structs.CARequest) (reterr error) {
// Attempt to update the state first.
oldState, err := c.setState(caStateReconfig, true)
if err != nil {
return err
}
defer func() {
// Using named return values in deferred funcs isnt too common in our code
// but it is first class Go functionality. The error erturned from the
// main func will be available by its given name within deferred functions.
// See: https://blog.golang.org/defer-panic-and-recover
if reterr == nil {
c.setState(caStateInitialized, false)
} else {
c.setState(oldState, false)
}
}()
// Attempt to initialize the config if we failed to do so in Initialize for some reason
prevConfig, err := c.initializeCAConfig()
if err != nil {
return err
}
// Exit early if it's a no-op change
state := c.delegate.State()
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
// Don't allow state changes. Either it needs to be empty or the same to allow
// read-modify-write loops that don't touch the State field.
if len(args.Config.State) > 0 &&
!reflect.DeepEqual(args.Config.State, config.State) {
return ErrStateReadOnly
}
// Don't allow users to change the ClusterID.
args.Config.ClusterID = config.ClusterID
if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) {
return nil
}
// If the provider hasn't changed, we need to load the current Provider state
// so it can decide if it needs to change resources or not based on the config
// change.
if args.Config.Provider == config.Provider {
// Note this is a shallow copy since the State method doc requires the
// provider return a map that will not be further modified and should not
// modify the one we pass to Configure.
args.Config.State = config.State
}
// Create a new instance of the provider described by the config
// and get the current active root CA. This acts as a good validation
// of the config and makes sure the provider is functioning correctly
// before we commit any changes to Raft.
newProvider, err := c.newProvider(args.Config)
if err != nil {
return fmt.Errorf("could not initialize provider: %v", err)
}
pCfg := ca.ProviderConfig{
ClusterID: args.Config.ClusterID,
Datacenter: c.serverConf.Datacenter,
// This endpoint can be called in a secondary DC too so set this correctly.
IsPrimary: c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter,
RawConfig: args.Config.Config,
State: args.Config.State,
}
if args.Config.Provider == config.Provider {
if validator, ok := newProvider.(ValidateConfigUpdater); ok {
if err := validator.ValidateConfigUpdate(prevConfig.Config, args.Config.Config); err != nil {
return fmt.Errorf("new configuration is incompatible with previous configuration: %w", err)
}
}
}
if err := newProvider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
cleanupNewProvider := func() {
if err := newProvider.Cleanup(args.Config.Provider != config.Provider, args.Config.Config); err != nil {
c.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err)
}
}
// If this is a secondary, just check if the intermediate needs to be regenerated.
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
if err := c.secondaryInitializeIntermediateCA(newProvider, args.Config); err != nil {
cleanupNewProvider()
return fmt.Errorf("Error updating secondary datacenter CA config: %v", err)
}
c.logger.Info("Secondary CA provider config updated")
return nil
}
if err := c.primaryUpdateRootCA(newProvider, args, config); err != nil {
cleanupNewProvider()
return err
}
return nil
}
// ValidateConfigUpdater is an optional interface that may be implemented
// by a ca.Provider. If the provider implements this interface, the
// ValidateConfigurationUpdate will be called when a user attempts to change the
// CA configuration, and the provider type has not changed from the previous
// configuration.
type ValidateConfigUpdater interface {
// ValidateConfigUpdate should return an error if the next configuration is
// incompatible with the previous configuration.
//
// TODO: use better types after https://github.com/hashicorp/consul/issues/12238
ValidateConfigUpdate(previous, next map[string]interface{}) error
}
func (c *CAManager) primaryUpdateRootCA(newProvider ca.Provider, args *structs.CARequest, config *structs.CAConfiguration) error {
providerRoot, err := newProvider.GenerateRoot()
if err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
newRootPEM := providerRoot.PEM
newActiveRoot, err := newCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID)
if err != nil {
return err
}
// TODO: https://github.com/hashicorp/consul/issues/12386
intermediate, err := newProvider.ActiveIntermediate()
if err != nil {
return fmt.Errorf("error fetching active intermediate: %w", err)
}
if intermediate == "" {
intermediate, err = newProvider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating intermediate: %w", err)
}
}
if intermediate != newRootPEM {
if err := setLeafSigningCert(newActiveRoot, intermediate); err != nil {
return err
}
}
// See if the provider needs to persist any state along with the config
pState, err := newProvider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
args.Config.State = pState
state := c.delegate.State()
// Compare the new provider's root CA ID to the current one. If they
// match, just update the existing provider with the new config.
// If they don't match, begin the root rotation process.
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}
// If the root didn't change, just update the config and return.
if root != nil && root.ID == newActiveRoot.ID {
args.Op = structs.CAOpSetConfig
_, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
// If the config has been committed, update the local provider instance
c.setCAProvider(newProvider, newActiveRoot)
c.logger.Info("CA provider config updated")
return nil
}
// get the old CA provider to be used for Cross Signing and to clean it up at the end
// of the functi8on.
oldProvider, _ := c.getCAProvider()
if oldProvider == nil {
return fmt.Errorf("internal error: CA provider is nil")
}
// We only even think about cross signing if the current provider has a root cert
// In some cases such as having a bad CA configuration during startup the provider
// may not have been able to generate a cert. We then want to be able to prevent
// an attempt to cross sign the cert which will definitely fail.
if root != nil {
// If it's a config change that would trigger a rotation (different provider/root):
// 1. Get the root from the new provider.
// 2. Call CrossSignCA on the old provider to sign the new root with the old one to
// get a cross-signed certificate.
// 3. Take the active root for the new provider and append the intermediate from step 2
// to its list of intermediates.
// TODO: this cert is already parsed once in newCARoot, could we remove the second parse?
newRoot, err := connect.ParseCert(newRootPEM)
if err != nil {
return err
}
// At this point, we know the config change has triggered a root rotation,
// either by swapping the provider type or changing the provider's config
// to use a different root certificate.
// First up, check that the current provider actually supports
// cross-signing.
canXSign, err := oldProvider.SupportsCrossSigning()
if err != nil {
return fmt.Errorf("CA provider error: %s", err)
}
if !canXSign && !args.Config.ForceWithoutCrossSigning {
return errors.New("The current CA Provider does not support cross-signing. " +
"You can try again with ForceWithoutCrossSigningSet but this may cause " +
"disruption - see documentation for more.")
}
if args.Config.ForceWithoutCrossSigning {
c.logger.Warn("ForceWithoutCrossSigning set, CA reconfiguration skipping cross-signing")
}
// If ForceWithoutCrossSigning wasn't set, attempt to have the old CA generate a
// cross-signed intermediate.
if canXSign && !args.Config.ForceWithoutCrossSigning {
// Have the old provider cross-sign the new root
xcCert, err := oldProvider.CrossSignCA(newRoot)
if err != nil {
return err
}
// Add the cross signed cert to the new CA's intermediates (to be attached
// to leaf certs). We do not want it to be the last cert if there are any
// existing intermediate certs so we push to the front.
newActiveRoot.IntermediateCerts = append([]string{xcCert}, newActiveRoot.IntermediateCerts...)
}
}
// Update the roots and CA config in the state store at the same time
idx, roots, err := state.CARoots(nil)
if err != nil {
return err
}
var newRoots structs.CARoots
for _, r := range roots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
newRoot.RotatedOutAt = c.timeNow()
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
args.Op = structs.CAOpSetRootsAndConfig
args.Index = idx
args.Config.ModifyIndex = config.ModifyIndex
args.Roots = newRoots
resp, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
// If the config has been committed, update the local provider instance
// and call teardown on the old provider
c.setCAProvider(newProvider, newActiveRoot)
if err := oldProvider.Cleanup(args.Config.Provider != config.Provider, args.Config.Config); err != nil {
c.logger.Warn("failed to clean up old provider", "provider", config.Provider, "error", err)
}
c.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider)
return nil
}
// primaryRenewIntermediate regenerates the intermediate cert in the primary datacenter.
// This is only run for CAs that require an intermediary in the primary DC, such as Vault.
// It should only be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) primaryRenewIntermediate(provider ca.Provider, newActiveRoot *structs.CARoot) error {
// Generate and sign an intermediate cert using the root CA.
intermediatePEM, err := provider.GenerateIntermediate()
if err != nil {
return fmt.Errorf("error generating new intermediate cert: %v", err)
}
if err := setLeafSigningCert(newActiveRoot, intermediatePEM); err != nil {
return err
}
c.logger.Info("generated new intermediate certificate for primary datacenter")
return nil
}
// secondaryRequestNewSigningCert creates a Certificate Signing Request, sends
// the request to the primary, and stores the received certificate in the
// provider.
// Should only be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) secondaryRequestNewSigningCert(provider ca.Provider, newActiveRoot *structs.CARoot) error {
csr, opaque, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
}
var intermediatePEM string
if err := c.delegate.forwardDC("ConnectCA.SignIntermediate", c.serverConf.PrimaryDatacenter, c.delegate.generateCASignRequest(csr), &intermediatePEM); err != nil {
// this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership
c.logger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err)
return nil
}
if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert, opaque); err != nil {
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
if err := setLeafSigningCert(newActiveRoot, intermediatePEM); err != nil {
return fmt.Errorf("Failed to set the leaf signing cert to the intermediate: %w", err)
}
c.logger.Info("received new intermediate certificate from primary datacenter")
return nil
}
// setLeafSigningCert updates the CARoot by appending the pem to the list of
// intermediate certificates, and setting the SigningKeyID to the encoded
// SubjectKeyId of the certificate.
func setLeafSigningCert(caRoot *structs.CARoot, pem string) error {
cert, err := connect.ParseCert(pem)
if err != nil {
return fmt.Errorf("error parsing leaf signing cert: %w", err)
}
if err := pruneExpiredIntermediates(caRoot); err != nil {
return err
}
caRoot.IntermediateCerts = append(caRoot.IntermediateCerts, pem)
caRoot.SigningKeyID = connect.EncodeSigningKeyID(cert.SubjectKeyId)
return nil
}
// pruneExpiredIntermediates removes expired intermediate certificates
// from the given CARoot.
func pruneExpiredIntermediates(caRoot *structs.CARoot) error {
var newIntermediates []string
now := time.Now()
for _, intermediatePEM := range caRoot.IntermediateCerts {
cert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing leaf signing cert: %w", err)
}
// Only keep the intermediate cert if it's still valid.
if cert.NotAfter.After(now) {
newIntermediates = append(newIntermediates, intermediatePEM)
}
}
caRoot.IntermediateCerts = newIntermediates
return nil
}
// runRenewIntermediate periodically attempts to renew the intermediate cert.
func (c *CAManager) runRenewIntermediate(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return nil
case <-time.After(structs.IntermediateCertRenewInterval):
retryLoopBackoffAbortOnSuccess(ctx, func() error {
return c.RenewIntermediate(ctx)
}, func(err error) {
c.logger.Error("error renewing intermediate certs",
"routine", intermediateCertRenewWatchRoutineName,
"error", err,
)
})
}
}
}
// RenewIntermediate checks the intermediate cert for
// expiration. If more than half the time a cert is valid has passed,
// it will try to renew it.
func (c *CAManager) RenewIntermediate(ctx context.Context) error {
return c.renewIntermediate(ctx, false)
}
func (c *CAManager) renewIntermediateNow(ctx context.Context) error {
return c.renewIntermediate(ctx, true)
}
func (c *CAManager) renewIntermediate(ctx context.Context, forceNow bool) error {
// Grab the 'lock' right away so the provider/config can't be changed out while we check
// the intermediate.
if _, err := c.setState(caStateRenewIntermediate, true); err != nil {
return err
}
defer c.setState(caStateInitialized, false)
isPrimary := c.serverConf.InPrimaryDatacenter()
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// If this isn't the primary, make sure the CA has been initialized.
if !isPrimary && !c.secondaryHasProviderRoots() {
return fmt.Errorf("secondary CA is not yet configured.")
}
state := c.delegate.State()
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}
activeRoot := root.Clone()
// If this is the primary, check if this is a provider that uses an intermediate cert. If
// it isn't, we don't need to check for a renewal.
if isPrimary && !primaryUsesIntermediate(provider) {
return nil
}
activeIntermediate, err := provider.ActiveIntermediate()
if err != nil {
return err
}
if activeIntermediate == "" {
return fmt.Errorf("datacenter doesn't have an active intermediate.")
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
if !forceNow {
if lessThanHalfTimePassed(c.timeNow(), intermediateCert.NotBefore, intermediateCert.NotAfter) {
return nil
}
}
// Enough time has passed, go ahead with getting a new intermediate.
renewalFunc := c.primaryRenewIntermediate
if !isPrimary {
renewalFunc = c.secondaryRequestNewSigningCert
}
if forceNow {
err := renewalFunc(provider, activeRoot)
if err != nil {
return err
}
} else {
errCh := make(chan error, 1)
go func() {
errCh <- renewalFunc(provider, activeRoot)
}()
// Wait for the renewal func to return or for the context to be canceled.
select {
case <-ctx.Done():
return ctx.Err()
case err := <-errCh:
if err != nil {
return err
}
}
}
if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil {
return err
}
c.setCAProvider(provider, activeRoot)
return nil
}
// secondaryCARootWatch maintains a blocking query to the primary datacenter's
// ConnectCA.Roots endpoint to monitor when it needs to request a new signed
// intermediate certificate.
func (c *CAManager) secondaryCARootWatch(ctx context.Context) error {
args := structs.DCSpecificRequest{
Datacenter: c.serverConf.PrimaryDatacenter,
QueryOptions: structs.QueryOptions{
// the maximum time the primary roots watch query can block before returning
MaxQueryTime: c.serverConf.MaxQueryTime,
},
}
c.logger.Debug("starting Connect CA root replication from primary datacenter", "primary", c.serverConf.PrimaryDatacenter)
retryLoopBackoff(ctx, func() error {
var roots structs.IndexedCARoots
if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
}
// Return if the context has been canceled while waiting on the RPC.
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Attempt to update the roots using the returned data.
if err := c.secondaryUpdateRoots(roots); err != nil {
return err
}
args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index)
return nil
}, func(err error) {
c.logger.Error("CA root replication failed, will retry",
"routine", secondaryCARootWatchRoutineName,
"error", err,
)
})
return nil
}
// secondaryUpdateRoots updates the cached roots from the primary and regenerates the intermediate
// certificate if necessary.
func (c *CAManager) secondaryUpdateRoots(roots structs.IndexedCARoots) error {
// Update the state first to claim the 'lock'.
if _, err := c.setState(caStateReconfig, true); err != nil {
return err
}
defer c.setState(caStateInitialized, false)
// Update the cached primary roots now that the lock is held.
c.secondarySetPrimaryRoots(roots)
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// Run the secondary CA init routine to see if we need to request a new
// intermediate.
if c.secondaryHasProviderRoots() {
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
return nil
}
// Attempt to initialize now that we have updated roots. This is an optimization
// so that we don't have to wait for the Initialize retry backoff if we were
// waiting on roots from the primary to be able to complete initialization.
if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil {
return fmt.Errorf("failed to initialize while updating primary roots: %w", err)
}
if err := c.secondaryInitializeProvider(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize secondary CA provider: %v", err)
}
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
return nil
}
// secondaryInitializeProvider configures the given provider for a secondary, non-root datacenter.
func (c *CAManager) secondaryInitializeProvider(provider ca.Provider, roots structs.IndexedCARoots) error {
if roots.TrustDomain == "" {
return fmt.Errorf("trust domain from primary datacenter is not initialized")
}
clusterID := strings.Split(roots.TrustDomain, ".")[0]
_, conf, err := c.delegate.State().CAConfig(nil)
if err != nil {
return err
}
pCfg := ca.ProviderConfig{
ClusterID: clusterID,
Datacenter: c.serverConf.Datacenter,
IsPrimary: false,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
return nil
}
// secondaryHasProviderRoots returns true after providerRoot has been set. This
// method is used to detect when the secondary has received the roots from the
// primary DC.
func (c *CAManager) secondaryHasProviderRoots() bool {
// TODO: this could potentially also use primaryRoots instead of providerRoot
c.providerLock.Lock()
defer c.providerLock.Unlock()
return c.providerRoot != nil
}
type connectSignRateLimiter struct {
// csrRateLimiter limits the rate of signing new certs if configured. Lazily
// initialized from current config to support dynamic changes.
// csrRateLimiterMu must be held while dereferencing the pointer or storing a
// new one, but methods can be called on the limiter object outside of the
// locked section. This is done only in the getCSRRateLimiterWithLimit method.
csrRateLimiter *rate.Limiter
csrRateLimiterMu sync.RWMutex
// csrConcurrencyLimiter is a dynamically resizable semaphore used to limit
// Sign RPC concurrency if configured. The zero value is usable as soon as
// SetSize is called which we do dynamically in the RPC handler to avoid
// having to hook elaborate synchronization mechanisms through the CA config
// endpoint and config reload etc.
csrConcurrencyLimiter semaphore.Dynamic
}
// getCSRRateLimiterWithLimit returns a rate.Limiter with the desired limit set.
// It uses the shared server-wide limiter unless the limit has been changed in
// config or the limiter has not been setup yet in which case it just-in-time
// configures the new limiter. We assume that limit changes are relatively rare
// and that all callers (there is currently only one) use the same config value
// as the limit. There might be some flapping if there are multiple concurrent
// requests in flight at the time the config changes where A sees the new value
// and updates, B sees the old but then gets this lock second and changes back.
// Eventually though and very soon (once all current RPCs are complete) we are
// guaranteed to have the correct limit set by the next RPC that comes in so I
// assume this is fine. If we observe strange behavior because of it, we could
// add hysteresis that prevents changes too soon after a previous change but
// that seems unnecessary for now.
func (l *connectSignRateLimiter) getCSRRateLimiterWithLimit(limit rate.Limit) *rate.Limiter {
l.csrRateLimiterMu.RLock()
lim := l.csrRateLimiter
l.csrRateLimiterMu.RUnlock()
// If there is a current limiter with the same limit, return it. This should
// be the common case.
if lim != nil && lim.Limit() == limit {
return lim
}
// Need to change limiter, get write lock
l.csrRateLimiterMu.Lock()
defer l.csrRateLimiterMu.Unlock()
// No limiter yet, or limit changed in CA config, reconfigure a new limiter.
// We use burst of 1 for a hard limit. Note that either bursting or waiting is
// necessary to get expected behavior in fact of random arrival times, but we
// don't need both and we use Wait with a small delay to smooth noise. See
// https://github.com/banks/sim-rate-limit-backoff/blob/master/README.md.
l.csrRateLimiter = rate.NewLimiter(limit, 1)
return l.csrRateLimiter
}
// AuthorizeAndSignCertificate signs a leaf certificate for the service or agent
// identified by the SPIFFE ID in the given CSR's SAN. It performs authorization
// using the given acl.Authorizer.
func (c *CAManager) AuthorizeAndSignCertificate(csr *x509.CertificateRequest, authz acl.Authorizer) (*structs.IssuedCert, error) {
// Note that only one spiffe id is allowed currently. If more than one is desired
// in future implmentations, then each ID should have authorization checks.
if len(csr.URIs) != 1 {
return nil, connect.InvalidCSRError("CSR SAN contains an invalid number of URIs: %v", len(csr.URIs))
}
if len(csr.EmailAddresses) > 0 {
return nil, connect.InvalidCSRError("CSR SAN does not allow specifying email addresses")
}
// Parse the SPIFFE ID from the CSR SAN.
spiffeID, err := connect.ParseCertURI(csr.URIs[0])
if err != nil {
return nil, err
}
// Perform authorization.
var authzContext acl.AuthorizerContext
allow := authz.ToAllowAuthorizer()
switch v := spiffeID.(type) {
case *connect.SpiffeIDService:
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.ServiceWriteAllowed(v.Service, &authzContext); err != nil {
return nil, err
}
// Verify that the DC in the service URI matches us. We might relax this
// requirement later but being restrictive for now is safer.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
case *connect.SpiffeIDAgent:
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.NodeWriteAllowed(v.Agent, &authzContext); err != nil {
return nil, err
}
case *connect.SpiffeIDMeshGateway:
// TODO(peering): figure out what is appropriate here for ACLs
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.MeshWriteAllowed(&authzContext); err != nil {
return nil, err
}
// Verify that the DC in the gateway URI matches us. We might relax this
// requirement later but being restrictive for now is safer.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
case *connect.SpiffeIDServer:
// The authorizer passed in should have unlimited permissions.
if err := allow.ACLWriteAllowed(&authzContext); err != nil {
return nil, err
}
// Verify that the DC in the URI matches us.
// The request must have been issued by a local server.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
default:
return nil, connect.InvalidCSRError("SPIFFE ID in CSR must be a service, mesh-gateway, or agent ID")
}
return c.SignCertificate(csr, spiffeID)
}
func (c *CAManager) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) {
provider, caRoot := c.getCAProvider()
if provider == nil {
return nil, fmt.Errorf("CA is uninitialized and unable to sign certificates yet: provider is nil")
} else if caRoot == nil {
return nil, fmt.Errorf("CA is uninitialized and unable to sign certificates yet: no root certificate")
}
// Verify that the CSR entity is in the cluster's trust domain
state := c.delegate.State()
_, config, err := state.CAConfig(nil)
if err != nil {
return nil, err
}
signingID := connect.SpiffeIDSigningForCluster(config.ClusterID)
serviceID, isService := spiffeID.(*connect.SpiffeIDService)
agentID, isAgent := spiffeID.(*connect.SpiffeIDAgent)
serverID, isServer := spiffeID.(*connect.SpiffeIDServer)
mgwID, isMeshGateway := spiffeID.(*connect.SpiffeIDMeshGateway)
var entMeta acl.EnterpriseMeta
switch {
case isService:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", serviceID.Host, signingID.Host())
}
entMeta.Merge(serviceID.GetEnterpriseMeta())
case isMeshGateway:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", mgwID.Host, signingID.Host())
}
entMeta.Merge(mgwID.GetEnterpriseMeta())
case isServer:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", serverID.Host, signingID.Host())
}
entMeta.Normalize()
case isAgent:
// isAgent - if we support more ID types then this would need to be an else if
// here we are just automatically fixing the trust domain. For auto-encrypt and
// auto-config they make certificate requests before learning about the roots
// so they will have a dummy trust domain in the CSR.
trustDomain := signingID.Host()
if agentID.Host != trustDomain {
originalURI := agentID.URI()
agentID.Host = trustDomain
// recreate the URIs list
uris := make([]*url.URL, len(csr.URIs))
for i, uri := range csr.URIs {
if originalURI.String() == uri.String() {
uris[i] = agentID.URI()
} else {
uris[i] = uri
}
}
csr.URIs = uris
}
entMeta.Merge(agentID.GetEnterpriseMeta())
default:
return nil, connect.InvalidCSRError("SPIFFE ID in CSR must be a service, agent, server, or mesh gateway ID")
}
commonCfg, err := config.GetCommonConfig()
if err != nil {
return nil, err
}
if commonCfg.CSRMaxPerSecond > 0 {
lim := c.caLeafLimiter.getCSRRateLimiterWithLimit(rate.Limit(commonCfg.CSRMaxPerSecond))
// Wait up to the small threshold we allow for a token.
ctx, cancel := context.WithTimeout(context.Background(), csrLimitWait)
defer cancel()
if lim.Wait(ctx) != nil {
return nil, ErrRateLimited
}
} else if commonCfg.CSRMaxConcurrent > 0 {
c.caLeafLimiter.csrConcurrencyLimiter.SetSize(int64(commonCfg.CSRMaxConcurrent))
ctx, cancel := context.WithTimeout(context.Background(), csrLimitWait)
defer cancel()
if err := c.caLeafLimiter.csrConcurrencyLimiter.Acquire(ctx); err != nil {
return nil, ErrRateLimited
}
defer c.caLeafLimiter.csrConcurrencyLimiter.Release()
}
connect.HackSANExtensionForCSR(csr)
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
// Check if the root expired before using it to sign.
// TODO: we store NotBefore and NotAfter on this struct, so we could avoid
// parsing the cert here.
err = c.checkExpired(caRoot.RootCert)
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
if err != nil {
return nil, fmt.Errorf("root expired: %w", err)
}
if c.isIntermediateUsedToSignLeaf() && len(caRoot.IntermediateCerts) > 0 {
inter := caRoot.IntermediateCerts[len(caRoot.IntermediateCerts)-1]
if err := c.checkExpired(inter); err != nil {
return nil, fmt.Errorf("intermediate expired: %w", err)
}
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
}
// All seems to be in order, actually sign it.
pem, err := provider.Sign(csr)
if err == ca.ErrRateLimited {
return nil, ErrRateLimited
}
if err != nil {
return nil, err
}
// Append any intermediates needed by this root.
for _, p := range caRoot.IntermediateCerts {
pem = pem + lib.EnsureTrailingNewline(p)
}
modIdx, err := c.delegate.ApplyCALeafRequest()
if err != nil {
return nil, err
}
cert, err := connect.ParseCert(pem)
if err != nil {
return nil, err
}
// Set the response
reply := structs.IssuedCert{
SerialNumber: connect.EncodeSerialNumber(cert.SerialNumber),
CertPEM: pem,
ValidAfter: cert.NotBefore,
ValidBefore: cert.NotAfter,
EnterpriseMeta: entMeta,
RaftIndex: structs.RaftIndex{
ModifyIndex: modIdx,
CreateIndex: modIdx,
},
}
switch {
case isService:
reply.Service = serviceID.Service
reply.ServiceURI = cert.URIs[0].String()
case isMeshGateway:
reply.Kind = structs.ServiceKindMeshGateway
reply.KindURI = cert.URIs[0].String()
case isAgent:
reply.Agent = agentID.Agent
reply.AgentURI = cert.URIs[0].String()
case isServer:
reply.ServerURI = cert.URIs[0].String()
default:
return nil, errors.New("not possible")
}
return &reply, nil
}
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
func (c *CAManager) checkExpired(pem string) error {
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
cert, err := connect.ParseCert(pem)
if err != nil {
return err
}
if cert.NotAfter.Before(c.timeNow()) {
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
return fmt.Errorf("certificate expired, expiration date: %s ", cert.NotAfter.String())
}
return nil
}
func primaryUsesIntermediate(provider ca.Provider) bool {
_, ok := provider.(ca.PrimaryUsesIntermediate)
return ok
}
func (c *CAManager) isIntermediateUsedToSignLeaf() bool {
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
return true
}
provider, _ := c.getCAProvider()
return primaryUsesIntermediate(provider)
}
2022-04-14 20:55:10 +00:00
func providerPrettyName(provider string) string {
switch provider {
case "consul":
return "Consul"
case "vault":
return "Vault"
case "aws-pca":
return "Aws-Pca"
case "provider-name":
return "Provider-Name"
default:
return provider
}
}