open-consul/agent/consul/leader_connect_ca.go

1663 lines
55 KiB
Go
Raw Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package consul
import (
"context"
"crypto/x509"
"errors"
"fmt"
"net/url"
"reflect"
"strings"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-uuid"
"golang.org/x/time/rate"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/routine"
"github.com/hashicorp/consul/lib/semaphore"
)
type caState string
const (
caStateUninitialized caState = "UNINITIALIZED"
caStateInitializing caState = "INITIALIZING"
caStateInitialized caState = "INITIALIZED"
caStateRenewIntermediate caState = "RENEWING"
caStateReconfig caState = "RECONFIGURING"
)
// caServerDelegate is an interface for server operations for facilitating
// easier testing.
type caServerDelegate interface {
ca.ConsulProviderStateDelegate
State() *state.Store
IsLeader() bool
ApplyCALeafRequest() (uint64, error)
forwardDC(method, dc string, args interface{}, reply interface{}) error
generateCASignRequest(csr string) *structs.CASignRequest
ServersSupportMultiDCConnectCA() error
}
// CAManager is a wrapper around CA operations such as updating roots, an intermediate
// or the configuration. All operations should go through the CAManager in order to
// avoid data races.
type CAManager struct {
delegate caServerDelegate
serverConf *Config
logger hclog.Logger
// rate limiter to use when signing leaf certificates
caLeafLimiter connectSignRateLimiter
providerLock sync.RWMutex
// provider is the current CA provider in use for Connect. This is
// only non-nil when we are the leader.
provider ca.Provider
// providerRoot is the CARoot that was stored along with the ca.Provider
// active. It's only updated in lock-step with the provider. This prevents
// races between state updates to active roots and the fetch of the provider
// instance.
providerRoot *structs.CARoot
// stateLock protects the internal state used for administrative CA tasks.
stateLock sync.Mutex
state caState
primaryRoots structs.IndexedCARoots // The most recently seen state of the root CAs from the primary datacenter.
leaderRoutineManager *routine.Manager
// providerShim is used to test CAManager with a fake provider.
providerShim ca.Provider
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
// shim time.Now for testing
timeNow func() time.Time
}
type caDelegateWithState struct {
*Server
}
func (c *caDelegateWithState) State() *state.Store {
return c.fsm.State()
}
func (c *caDelegateWithState) ApplyCARequest(req *structs.CARequest) (interface{}, error) {
return c.Server.raftApplyMsgpack(structs.ConnectCARequestType, req)
}
func (c *caDelegateWithState) ApplyCALeafRequest() (uint64, error) {
// TODO(banks): when we implement IssuedCerts table we can use the insert to
// that as the raft index to return in response.
//
// UPDATE(mkeeler): The original implementation relied on updating the CAConfig
// and using its index as the ModifyIndex for certs. This was buggy. The long
// term goal is still to insert some metadata into raft about the certificates
// and use that raft index for the ModifyIndex. This is a partial step in that
// direction except that we only are setting an index and not storing the
// metadata.
req := structs.CALeafRequest{
Op: structs.CALeafOpIncrementIndex,
Datacenter: c.Server.config.Datacenter,
}
resp, err := c.Server.raftApplyMsgpack(structs.ConnectCALeafRequestType|structs.IgnoreUnknownTypeFlag, &req)
2021-07-12 14:37:25 +00:00
if err != nil {
return 0, err
}
modIdx, ok := resp.(uint64)
if !ok {
return 0, fmt.Errorf("Invalid response from updating the leaf cert index")
}
return modIdx, err
}
func (c *caDelegateWithState) generateCASignRequest(csr string) *structs.CASignRequest {
return &structs.CASignRequest{
Datacenter: c.Server.config.PrimaryDatacenter,
CSR: csr,
WriteRequest: structs.WriteRequest{Token: c.Server.tokens.ReplicationToken()},
}
}
func (c *caDelegateWithState) ServersSupportMultiDCConnectCA() error {
versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.Server, c.Server.config.PrimaryDatacenter, minMultiDCConnectVersion)
if !primaryFound {
return fmt.Errorf("primary datacenter is unreachable")
}
if !versionOk {
return fmt.Errorf("all servers in the primary datacenter are not at the minimum version %v", minMultiDCConnectVersion)
}
return nil
}
func (c *caDelegateWithState) ProviderState(id string) (*structs.CAConsulProviderState, error) {
_, s, err := c.fsm.State().CAProviderState(id)
return s, err
}
func NewCAManager(delegate caServerDelegate, leaderRoutineManager *routine.Manager, logger hclog.Logger, config *Config) *CAManager {
return &CAManager{
delegate: delegate,
logger: logger,
serverConf: config,
state: caStateUninitialized,
leaderRoutineManager: leaderRoutineManager,
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
timeNow: time.Now,
}
}
// setState attempts to update the CA state to the given state.
// Valid state transitions are:
//
// caStateInitialized -> <any state except caStateInitializing>
// caStateUninitialized -> caStateInitializing
// caStateUninitialized -> caStateReconfig
//
// Other state transitions may be forced if the validateState parameter is set to false.
// This will mainly be used in deferred functions which aim to set the final status based
// a successful/error return.
func (c *CAManager) setState(newState caState, validateState bool) (caState, error) {
c.stateLock.Lock()
defer c.stateLock.Unlock()
state := c.state
if !validateState ||
(state == caStateInitialized && newState != caStateInitializing) ||
(state == caStateUninitialized && newState == caStateInitializing) ||
(state == caStateUninitialized && newState == caStateReconfig) {
c.state = newState
} else {
return state, &caStateError{Current: state}
}
return state, nil
}
type caStateError struct {
Current caState
}
func (e *caStateError) Error() string {
return fmt.Sprintf("CA is already in state %q", e.Current)
}
// secondarySetPrimaryRoots updates the most recently seen roots from the primary.
func (c *CAManager) secondarySetPrimaryRoots(newRoots structs.IndexedCARoots) {
// TODO: this could be a different lock, as long as its the same lock in secondaryGetPrimaryRoots
c.stateLock.Lock()
defer c.stateLock.Unlock()
c.primaryRoots = newRoots
}
func (c *CAManager) secondaryGetActivePrimaryCARoot() (*structs.CARoot, error) {
// TODO: this could be a different lock, as long as its the same lock in secondarySetPrimaryRoots
c.stateLock.Lock()
primaryRoots := c.primaryRoots
c.stateLock.Unlock()
for _, root := range primaryRoots.Roots {
if root.ID == primaryRoots.ActiveRootID && root.Active {
return root, nil
}
}
return nil, fmt.Errorf("primary datacenter does not have an active root CA for Connect")
}
// initializeCAConfig is used to initialize the CA config if necessary
// when setting up the CA during establishLeadership. The state should be set to
// non-ready before calling this.
func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) {
st := c.delegate.State()
_, config, err := st.CAConfig(nil)
if err != nil {
return nil, err
}
if config == nil {
config = c.serverConf.CAConfig
if c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter && config.ClusterID == "" {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
config.ClusterID = id
}
} else if _, ok := config.Config["IntermediateCertTTL"]; !ok {
dup := *config
copied := make(map[string]interface{})
for k, v := range dup.Config {
copied[k] = v
}
copied["IntermediateCertTTL"] = connect.DefaultIntermediateCertTTL.String()
dup.Config = copied
config = &dup
} else {
return config, nil
}
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: config,
}
if _, err := c.delegate.ApplyCARequest(&req); err != nil {
return nil, err
}
return config, nil
}
// newCARoot returns a filled-in structs.CARoot from a raw PEM value.
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
func newCARoot(rootResult ca.CAChainResult, provider, clusterID string) (*structs.CARoot, error) {
primaryCert, err := connect.ParseCert(rootResult.PEM)
if err != nil {
return nil, err
}
keyType, keyBits, err := connect.KeyInfoFromCert(primaryCert)
if err != nil {
return nil, fmt.Errorf("error extracting root key info: %v", err)
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
caRoot := &structs.CARoot{
ID: connect.CalculateCertFingerprint(primaryCert.Raw),
2022-04-14 20:55:10 +00:00
Name: fmt.Sprintf("%s CA Primary Cert", providerPrettyName(provider)),
SerialNumber: primaryCert.SerialNumber.Uint64(),
SigningKeyID: connect.EncodeSigningKeyID(primaryCert.SubjectKeyId),
ExternalTrustDomain: clusterID,
NotBefore: primaryCert.NotBefore,
NotAfter: primaryCert.NotAfter,
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
RootCert: lib.EnsureTrailingNewline(rootResult.PEM),
PrivateKeyType: keyType,
PrivateKeyBits: keyBits,
Active: true,
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
}
if rootResult.IntermediatePEM == "" {
return caRoot, nil
}
if err := setLeafSigningCert(caRoot, rootResult.IntermediatePEM); err != nil {
return nil, fmt.Errorf("error setting leaf signing cert: %w", err)
}
return caRoot, nil
}
// getCAProvider returns the currently active instance of the CA Provider,
// as well as the active root.
func (c *CAManager) getCAProvider() (ca.Provider, *structs.CARoot) {
retries := 0
for {
c.providerLock.RLock()
result := c.provider
resultRoot := c.providerRoot
c.providerLock.RUnlock()
// In cases where an agent is started with managed proxies, we may ask
// for the provider before establishLeadership completes. If we're the
// leader, then wait and get the provider again
if result == nil && c.delegate.IsLeader() && retries < 10 {
retries++
time.Sleep(50 * time.Millisecond)
continue
}
return result, resultRoot
}
}
// setCAProvider is being called while holding the stateLock
// which means it must never take that lock itself or call anything that does.
func (c *CAManager) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
c.providerLock.Lock()
c.provider = newProvider
c.providerRoot = root
c.providerLock.Unlock()
}
func (c *CAManager) Start(ctx context.Context) {
// Attempt to initialize the Connect CA now. This will
// happen during leader establishment and it would be great
// if the CA was ready to go once that process was finished.
if err := c.Initialize(); err != nil {
c.logger.Error("Failed to initialize Connect CA", "error", err)
// we failed to fully initialize the CA so we need to spawn a
// go routine to retry this process until it succeeds or we lose
// leadership and the go routine gets stopped.
c.leaderRoutineManager.Start(ctx, backgroundCAInitializationRoutineName, c.backgroundCAInitialization)
} else {
// We only start these if CA initialization was successful. If not the completion of the
// background CA initialization will start these routines.
c.startPostInitializeRoutines(ctx)
}
}
func (c *CAManager) Stop() {
c.leaderRoutineManager.Stop(secondaryCARootWatchRoutineName)
c.leaderRoutineManager.Stop(intermediateCertRenewWatchRoutineName)
c.leaderRoutineManager.Stop(backgroundCAInitializationRoutineName)
if provider, _ := c.getCAProvider(); provider != nil {
if needsStop, ok := provider.(ca.NeedsStop); ok {
needsStop.Stop()
}
}
c.setState(caStateUninitialized, false)
c.primaryRoots = structs.IndexedCARoots{}
c.setCAProvider(nil, nil)
}
func (c *CAManager) startPostInitializeRoutines(ctx context.Context) {
// Start the Connect secondary DC actions if enabled.
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
c.leaderRoutineManager.Start(ctx, secondaryCARootWatchRoutineName, c.secondaryCARootWatch)
}
c.leaderRoutineManager.Start(ctx, intermediateCertRenewWatchRoutineName, c.runRenewIntermediate)
}
func (c *CAManager) backgroundCAInitialization(ctx context.Context) error {
retryLoopBackoffAbortOnSuccess(ctx, c.Initialize, func(err error) {
c.logger.Error("Failed to initialize Connect CA",
"routine", backgroundCAInitializationRoutineName,
"error", err,
)
})
if err := ctx.Err(); err != nil {
return err
}
c.logger.Info("Successfully initialized the Connect CA")
c.startPostInitializeRoutines(ctx)
return nil
}
// Initialize sets up the CA provider when gaining leadership, either bootstrapping
// the CA if this is the primary DC or making a remote RPC for intermediate signing
// if this is a secondary DC.
func (c *CAManager) Initialize() (reterr error) {
// Bail if connect isn't enabled.
if !c.serverConf.ConnectEnabled {
return nil
}
// Update the state before doing anything else.
_, err := c.setState(caStateInitializing, true)
var errCaState *caStateError
switch {
case errors.As(err, &errCaState) && errCaState.Current == caStateInitialized:
return nil
case err != nil:
return err
}
defer func() {
// Using named return values in deferred funcs isnt too common in our code
// but it is first class Go functionality. The error erturned from the
// main func will be available by its given name within deferred functions.
// See: https://blog.golang.org/defer-panic-and-recover
if reterr == nil {
c.setState(caStateInitialized, false)
} else {
c.setState(caStateUninitialized, false)
}
}()
// Initialize the provider based on the current config.
conf, err := c.initializeCAConfig()
if err != nil {
return err
}
provider, err := c.newProvider(conf)
if err != nil {
return err
}
c.setCAProvider(provider, nil)
if c.serverConf.PrimaryDatacenter == c.serverConf.Datacenter {
return c.primaryInitialize(provider, conf)
}
return c.secondaryInitialize(provider, conf)
}
func (c *CAManager) secondaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error {
if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil {
return fmt.Errorf("initialization will be deferred: %w", err)
}
// Get the root CA to see if we need to refresh our intermediate.
args := structs.DCSpecificRequest{
Datacenter: c.serverConf.PrimaryDatacenter,
}
var roots structs.IndexedCARoots
if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("failed to get CA roots from primary DC: %w", err)
}
c.secondarySetPrimaryRoots(roots)
// Configure the CA provider and initialize the intermediate certificate if necessary.
if err := c.secondaryInitializeProvider(provider, roots); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return err
}
c.logger.Info("initialized secondary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// createProvider returns a connect CA provider from the given config.
func (c *CAManager) newProvider(conf *structs.CAConfiguration) (ca.Provider, error) {
logger := c.logger.Named(conf.Provider)
switch conf.Provider {
case structs.ConsulCAProvider:
return ca.NewConsulProvider(c.delegate, logger), nil
case structs.VaultCAProvider:
return ca.NewVaultProvider(logger), nil
case structs.AWSCAProvider:
return ca.NewAWSProvider(logger), nil
default:
if c.providerShim != nil {
return c.providerShim, nil
}
return nil, fmt.Errorf("unknown CA provider %q", conf.Provider)
}
}
// primaryInitialize runs the initialization logic for a root CA. It should only
// be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) primaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error {
pCfg := ca.ProviderConfig{
ClusterID: conf.ClusterID,
Datacenter: c.serverConf.Datacenter,
IsPrimary: true,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
// If the provider has state to persist and it's changed or new then update
// CAConfig.
pState, err := provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
if !reflect.DeepEqual(conf.State, pState) {
// Update the CAConfig in raft to persist the provider state
conf.State = pState
req := structs.CARequest{
Op: structs.CAOpSetConfig,
Config: conf,
}
if _, err = c.delegate.ApplyCARequest(&req); err != nil {
return fmt.Errorf("error persisting provider state: %v", err)
}
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
root, err := provider.GenerateCAChain()
if err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
rootCA, err := newCARoot(root, conf.Provider, conf.ClusterID)
if err != nil {
return err
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
var rootUpdateRequired bool
if len(rootCA.IntermediateCerts) > 0 {
rootUpdateRequired = true
}
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := c.delegate.State()
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
if activeRoot != nil && !rootUpdateRequired {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}
// TODO: why doesn't this c.setCAProvider(provider, activeRoot) ?
rootCA.IntermediateCerts = activeRoot.IntermediateCerts
c.setCAProvider(provider, rootCA)
c.logger.Info("initialized primary datacenter CA from existing CARoot with provider", "provider", conf.Provider)
return nil
}
if err := c.persistNewRootAndConfig(provider, rootCA, conf); err != nil {
return err
}
c.setCAProvider(provider, rootCA)
c.logger.Info("initialized primary datacenter CA with provider", "provider", conf.Provider)
return nil
}
// getLeafSigningCertFromRoot returns the PEM encoded certificate that should be used to
// sign leaf certificates in the local datacenter. The SubjectKeyId of the
// returned cert should always match the SigningKeyID of the CARoot.
//
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
// TODO: only used in tests. Remove if possible.
func (c *CAManager) getLeafSigningCertFromRoot(root *structs.CARoot) string {
if len(root.IntermediateCerts) == 0 {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
return root.RootCert
}
return root.IntermediateCerts[len(root.IntermediateCerts)-1]
}
// secondaryInitializeIntermediateCA generates a Certificate Signing Request (CSR)
// for the intermediate CA that is used to sign leaf certificates in the secondary.
// The CSR is signed by the primary DC and then persisted in the state store.
//
// This method should only be called while the state lock is held by setting the
// state to non-ready.
func (c *CAManager) secondaryInitializeIntermediateCA(provider ca.Provider, config *structs.CAConfiguration) error {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
activeIntermediate, err := provider.ActiveLeafSigningCert()
if err != nil {
return err
}
_, activeRoot, err := c.delegate.State().CARootActive(nil)
if err != nil {
return err
}
var currentSigningKeyID string
if activeRoot != nil {
currentSigningKeyID = activeRoot.SigningKeyID
}
var expectedSigningKeyID string
if activeIntermediate != "" {
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
}
newActiveRoot, err := c.secondaryGetActivePrimaryCARoot()
if err != nil {
return err
}
// Get a signed intermediate from the primary DC if the provider
// hasn't been initialized yet or if the primary's root has changed.
needsNewIntermediate := activeIntermediate == ""
if activeRoot != nil && newActiveRoot.ID != activeRoot.ID {
needsNewIntermediate = true
}
// Also we take this opportunity to correct an incorrectly persisted SigningKeyID
// in secondary datacenters (see PR-6513).
if expectedSigningKeyID != "" && currentSigningKeyID != expectedSigningKeyID {
needsNewIntermediate = true
}
if needsNewIntermediate {
if err := c.secondaryRequestNewSigningCert(provider, newActiveRoot); err != nil {
return err
}
} else {
// Discard the primary's representation since our local one is
// sufficiently up to date.
newActiveRoot = activeRoot
}
// Determine whether a root update is needed, and persist the roots/config accordingly.
var newRoot *structs.CARoot
if activeRoot == nil || needsNewIntermediate {
newRoot = newActiveRoot
}
if err := c.persistNewRootAndConfig(provider, newRoot, config); err != nil {
return err
}
c.setCAProvider(provider, newActiveRoot)
return nil
}
// persistNewRootAndConfig should only be called while the state lock is held
// by setting the state to non-ready.
// If newActiveRoot is non-nil, it will be appended to the current roots list.
// If config is non-nil, it will be used to overwrite the existing config.
func (c *CAManager) persistNewRootAndConfig(provider ca.Provider, newActiveRoot *structs.CARoot, config *structs.CAConfiguration) error {
state := c.delegate.State()
idx, oldRoots, err := state.CARoots(nil)
if err != nil {
return err
}
// Look up the existing CA config if a new one wasn't provided.
var newConf structs.CAConfiguration
_, storedConfig, err := state.CAConfig(nil)
if err != nil {
return err
}
if storedConfig == nil {
return fmt.Errorf("local CA not initialized yet")
}
// Exit early if the change is a no-op.
if !shouldPersistNewRootAndConfig(newActiveRoot, storedConfig, config) {
return nil
}
if config != nil {
newConf = *config
} else {
newConf = *storedConfig
}
// Update the trust domain for the config if there's a new root, or keep the old
// one if the root isn't being updated.
newConf.ModifyIndex = storedConfig.ModifyIndex
if newActiveRoot != nil {
newConf.ClusterID = newActiveRoot.ExternalTrustDomain
} else {
_, activeRoot, err := state.CARootActive(nil)
if err != nil {
return err
}
newConf.ClusterID = activeRoot.ExternalTrustDomain
}
// Persist any state the provider needs us to
newConf.State, err = provider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
// If there's a new active root, copy the root list and append it, updating
// the old root with the time it was rotated out.
var newRoots structs.CARoots
for _, r := range oldRoots {
newRoot := *r
if newRoot.Active && newActiveRoot != nil {
newRoot.Active = false
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
newRoot.RotatedOutAt = c.timeNow()
}
if newRoot.ExternalTrustDomain == "" {
newRoot.ExternalTrustDomain = newConf.ClusterID
}
newRoots = append(newRoots, &newRoot)
}
if newActiveRoot != nil {
newRoots = append(newRoots, newActiveRoot)
}
args := &structs.CARequest{
Op: structs.CAOpSetRootsAndConfig,
Index: idx,
Roots: newRoots,
Config: &newConf,
}
resp, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
c.logger.Info("updated root certificates from primary datacenter")
return nil
}
func shouldPersistNewRootAndConfig(newActiveRoot *structs.CARoot, oldConfig, newConfig *structs.CAConfiguration) bool {
if newActiveRoot != nil {
return true
}
if newConfig == nil {
return false
}
return newConfig.Provider == oldConfig.Provider && reflect.DeepEqual(newConfig.Config, oldConfig.Config)
}
func (c *CAManager) UpdateConfiguration(args *structs.CARequest) (reterr error) {
// Attempt to update the state first.
oldState, err := c.setState(caStateReconfig, true)
if err != nil {
return err
}
defer func() {
// Using named return values in deferred funcs isnt too common in our code
// but it is first class Go functionality. The error erturned from the
// main func will be available by its given name within deferred functions.
// See: https://blog.golang.org/defer-panic-and-recover
if reterr == nil {
c.setState(caStateInitialized, false)
} else {
c.setState(oldState, false)
}
}()
// Attempt to initialize the config if we failed to do so in Initialize for some reason
prevConfig, err := c.initializeCAConfig()
if err != nil {
return err
}
// Exit early if it's a no-op change
state := c.delegate.State()
_, config, err := state.CAConfig(nil)
if err != nil {
return err
}
// Don't allow state changes. Either it needs to be empty or the same to allow
// read-modify-write loops that don't touch the State field.
if len(args.Config.State) > 0 &&
!reflect.DeepEqual(args.Config.State, config.State) {
return ErrStateReadOnly
}
// Don't allow users to change the ClusterID.
args.Config.ClusterID = config.ClusterID
if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) {
return nil
}
// If the provider hasn't changed, we need to load the current Provider state
// so it can decide if it needs to change resources or not based on the config
// change.
if args.Config.Provider == config.Provider {
// Note this is a shallow copy since the State method doc requires the
// provider return a map that will not be further modified and should not
// modify the one we pass to Configure.
args.Config.State = config.State
}
// Create a new instance of the provider described by the config
// and get the current active root CA. This acts as a good validation
// of the config and makes sure the provider is functioning correctly
// before we commit any changes to Raft.
newProvider, err := c.newProvider(args.Config)
if err != nil {
return fmt.Errorf("could not initialize provider: %v", err)
}
pCfg := ca.ProviderConfig{
ClusterID: args.Config.ClusterID,
Datacenter: c.serverConf.Datacenter,
// This endpoint can be called in a secondary DC too so set this correctly.
IsPrimary: c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter,
RawConfig: args.Config.Config,
State: args.Config.State,
}
if args.Config.Provider == config.Provider {
if validator, ok := newProvider.(ValidateConfigUpdater); ok {
if err := validator.ValidateConfigUpdate(prevConfig.Config, args.Config.Config); err != nil {
return fmt.Errorf("new configuration is incompatible with previous configuration: %w", err)
}
}
}
if err := newProvider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
cleanupNewProvider := func() {
if err := newProvider.Cleanup(args.Config.Provider != config.Provider, args.Config.Config); err != nil {
c.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err)
}
}
// If this is a secondary, just check if the intermediate needs to be regenerated.
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
if err := c.secondaryInitializeIntermediateCA(newProvider, args.Config); err != nil {
cleanupNewProvider()
return fmt.Errorf("Error updating secondary datacenter CA config: %v", err)
}
c.logger.Info("Secondary CA provider config updated")
return nil
}
if err := c.primaryUpdateRootCA(newProvider, args, config); err != nil {
cleanupNewProvider()
return err
}
return nil
}
// ValidateConfigUpdater is an optional interface that may be implemented
// by a ca.Provider. If the provider implements this interface, the
// ValidateConfigurationUpdate will be called when a user attempts to change the
// CA configuration, and the provider type has not changed from the previous
// configuration.
type ValidateConfigUpdater interface {
// ValidateConfigUpdate should return an error if the next configuration is
// incompatible with the previous configuration.
//
// TODO: use better types after https://github.com/hashicorp/consul/issues/12238
ValidateConfigUpdate(previous, next map[string]interface{}) error
}
func (c *CAManager) primaryUpdateRootCA(newProvider ca.Provider, args *structs.CARequest, config *structs.CAConfiguration) error {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
// See if the provider needs to persist any state along with the config
pState, err := newProvider.State()
if err != nil {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
return fmt.Errorf("error getting provider state: %v", err)
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
args.Config.State = pState
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
providerRoot, err := newProvider.GenerateCAChain()
if err != nil {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
return fmt.Errorf("error generating CA root certificate: %v", err)
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
newRootPEM := providerRoot.PEM
newActiveRoot, err := newCARoot(providerRoot, args.Config.Provider, args.Config.ClusterID)
if err != nil {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
return err
}
state := c.delegate.State()
// Compare the new provider's root CA ID to the current one. If they
// match, just update the existing provider with the new config.
// If they don't match, begin the root rotation process.
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}
// If the root didn't change, just update the config and return.
if root != nil && root.ID == newActiveRoot.ID {
args.Op = structs.CAOpSetConfig
_, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
// If the config has been committed, update the local provider instance
c.setCAProvider(newProvider, newActiveRoot)
c.logger.Info("CA provider config updated")
return nil
}
// get the old CA provider to be used for Cross Signing and to clean it up at the end
// of the functi8on.
oldProvider, _ := c.getCAProvider()
if oldProvider == nil {
return fmt.Errorf("internal error: CA provider is nil")
}
// We only even think about cross signing if the current provider has a root cert
// In some cases such as having a bad CA configuration during startup the provider
// may not have been able to generate a cert. We then want to be able to prevent
// an attempt to cross sign the cert which will definitely fail.
if root != nil {
// If it's a config change that would trigger a rotation (different provider/root):
// 1. Get the root from the new provider.
// 2. Call CrossSignCA on the old provider to sign the new root with the old one to
// get a cross-signed certificate.
// 3. Take the active root for the new provider and append the intermediate from step 2
// to its list of intermediates.
// TODO: this cert is already parsed once in newCARoot, could we remove the second parse?
newRoot, err := connect.ParseCert(newRootPEM)
if err != nil {
return err
}
// At this point, we know the config change has triggered a root rotation,
// either by swapping the provider type or changing the provider's config
// to use a different root certificate.
// First up, check that the current provider actually supports
// cross-signing.
canXSign, err := oldProvider.SupportsCrossSigning()
if err != nil {
return fmt.Errorf("CA provider error: %s", err)
}
if !canXSign && !args.Config.ForceWithoutCrossSigning {
return errors.New("The current CA Provider does not support cross-signing. " +
"You can try again with ForceWithoutCrossSigningSet but this may cause " +
"disruption - see documentation for more.")
}
if args.Config.ForceWithoutCrossSigning {
c.logger.Warn("ForceWithoutCrossSigning set, CA reconfiguration skipping cross-signing")
}
// If ForceWithoutCrossSigning wasn't set, attempt to have the old CA generate a
// cross-signed intermediate.
if canXSign && !args.Config.ForceWithoutCrossSigning {
// Have the old provider cross-sign the new root
xcCert, err := oldProvider.CrossSignCA(newRoot)
if err != nil {
return err
}
// Add the cross signed cert to the new CA's intermediates (to be attached
// to leaf certs). We do not want it to be the last cert if there are any
// existing intermediate certs so we push to the front.
newActiveRoot.IntermediateCerts = append([]string{xcCert}, newActiveRoot.IntermediateCerts...)
}
}
// Update the roots and CA config in the state store at the same time
idx, roots, err := state.CARoots(nil)
if err != nil {
return err
}
var newRoots structs.CARoots
for _, r := range roots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
newRoot.RotatedOutAt = c.timeNow()
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)
args.Op = structs.CAOpSetRootsAndConfig
args.Index = idx
args.Config.ModifyIndex = config.ModifyIndex
args.Roots = newRoots
resp, err := c.delegate.ApplyCARequest(args)
if err != nil {
return err
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}
// If the config has been committed, update the local provider instance
// and call teardown on the old provider
c.setCAProvider(newProvider, newActiveRoot)
if err := oldProvider.Cleanup(args.Config.Provider != config.Provider, args.Config.Config); err != nil {
c.logger.Warn("failed to clean up old provider", "provider", config.Provider, "error", err)
}
c.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider)
return nil
}
// primaryRenewIntermediate regenerates the intermediate cert in the primary datacenter.
// This is only run for CAs that require an intermediary in the primary DC, such as Vault.
// It should only be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) primaryRenewIntermediate(provider ca.Provider, newActiveRoot *structs.CARoot) error {
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
p, ok := provider.(ca.PrimaryUsesIntermediate)
if !ok {
return fmt.Errorf("tried to renew intermediates for a provider that does not use them")
}
// Generate and sign an intermediate cert using the root CA.
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
intermediatePEM, err := p.GenerateLeafSigningCert()
if err != nil {
return fmt.Errorf("error generating new intermediate cert: %v", err)
}
if err := setLeafSigningCert(newActiveRoot, intermediatePEM); err != nil {
return err
}
c.logger.Info("generated new intermediate certificate for primary datacenter")
return nil
}
// secondaryRequestNewSigningCert creates a Certificate Signing Request, sends
// the request to the primary, and stores the received certificate in the
// provider.
// Should only be called while the state lock is held by setting the state to non-ready.
func (c *CAManager) secondaryRequestNewSigningCert(provider ca.Provider, newActiveRoot *structs.CARoot) error {
csr, opaque, err := provider.GenerateIntermediateCSR()
if err != nil {
return err
}
var intermediatePEM string
if err := c.delegate.forwardDC("ConnectCA.SignIntermediate", c.serverConf.PrimaryDatacenter, c.delegate.generateCASignRequest(csr), &intermediatePEM); err != nil {
// this is a failure in the primary and shouldn't be capable of erroring out our establishing leadership
c.logger.Warn("Primary datacenter refused to sign our intermediate CA certificate", "error", err)
return nil
}
if err := provider.SetIntermediate(intermediatePEM, newActiveRoot.RootCert, opaque); err != nil {
return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err)
}
if err := setLeafSigningCert(newActiveRoot, intermediatePEM); err != nil {
return fmt.Errorf("Failed to set the leaf signing cert to the intermediate: %w", err)
}
c.logger.Info("received new intermediate certificate from primary datacenter")
return nil
}
// setLeafSigningCert updates the CARoot by appending the pem to the list of
// intermediate certificates, and setting the SigningKeyID to the encoded
// SubjectKeyId of the certificate.
func setLeafSigningCert(caRoot *structs.CARoot, pem string) error {
cert, err := connect.ParseCert(pem)
if err != nil {
return fmt.Errorf("error parsing leaf signing cert: %w", err)
}
if err := pruneExpiredIntermediates(caRoot); err != nil {
return err
}
caRoot.IntermediateCerts = append(caRoot.IntermediateCerts, pem)
caRoot.SigningKeyID = connect.EncodeSigningKeyID(cert.SubjectKeyId)
return nil
}
// pruneExpiredIntermediates removes expired intermediate certificates
// from the given CARoot.
func pruneExpiredIntermediates(caRoot *structs.CARoot) error {
var newIntermediates []string
now := time.Now()
for _, intermediatePEM := range caRoot.IntermediateCerts {
cert, err := connect.ParseCert(intermediatePEM)
if err != nil {
return fmt.Errorf("error parsing leaf signing cert: %w", err)
}
// Only keep the intermediate cert if it's still valid.
if cert.NotAfter.After(now) {
newIntermediates = append(newIntermediates, intermediatePEM)
}
}
caRoot.IntermediateCerts = newIntermediates
return nil
}
// runRenewIntermediate periodically attempts to renew the intermediate cert.
func (c *CAManager) runRenewIntermediate(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return nil
case <-time.After(structs.IntermediateCertRenewInterval):
retryLoopBackoffAbortOnSuccess(ctx, func() error {
return c.RenewIntermediate(ctx)
}, func(err error) {
c.logger.Error("error renewing intermediate certs",
"routine", intermediateCertRenewWatchRoutineName,
"error", err,
)
})
}
}
}
// RenewIntermediate checks the intermediate cert for
// expiration. If more than half the time a cert is valid has passed,
// it will try to renew it.
func (c *CAManager) RenewIntermediate(ctx context.Context) error {
return c.renewIntermediate(ctx, false)
}
func (c *CAManager) renewIntermediateNow(ctx context.Context) error {
return c.renewIntermediate(ctx, true)
}
func (c *CAManager) renewIntermediate(ctx context.Context, forceNow bool) error {
// Grab the 'lock' right away so the provider/config can't be changed out while we check
// the intermediate.
if _, err := c.setState(caStateRenewIntermediate, true); err != nil {
return err
}
defer c.setState(caStateInitialized, false)
isPrimary := c.serverConf.InPrimaryDatacenter()
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// If this isn't the primary, make sure the CA has been initialized.
if !isPrimary && !c.secondaryHasProviderRoots() {
return fmt.Errorf("secondary CA is not yet configured.")
}
state := c.delegate.State()
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}
activeRoot := root.Clone()
// If this is the primary, check if this is a provider that uses an intermediate cert. If
// it isn't, we don't need to check for a renewal.
if isPrimary && !primaryUsesIntermediate(provider) {
return nil
}
Connect CA Primary Provider refactor (#16749) * Rename Intermediate cert references to LeafSigningCert Within the Consul CA subsystem, the term "Intermediate" is confusing because the meaning changes depending on provider and datacenter (primary vs secondary). For example, when using the Consul CA the "ActiveIntermediate" may return the root certificate in a primary datacenter. At a high level, we are interested in knowing which CA is responsible for signing leaf certs, regardless of its position in a certificate chain. This rename makes the intent clearer. * Move provider state check earlier * Remove calls to GenerateLeafSigningCert GenerateLeafSigningCert (formerly known as GenerateIntermediate) is vestigial in non-Vault providers, as it simply returns the root certificate in primary datacenters. By folding Vault's intermediate cert logic into `GenerateRoot` we can encapsulate the intermediate cert handling within `newCARoot`. * Move GenerateLeafSigningCert out of PrimaryProvidder Now that the Vault Provider calls GenerateLeafSigningCert within GenerateRoot, we can remove the method from all other providers that never used it in a meaningful way. * Add test for IntermediatePEM * Rename GenerateRoot to GenerateCAChain "Root" was being overloaded in the Consul CA context, as different providers and configs resulted in a single root certificate or a chain originating from an external trusted CA. Since the Vault provider also generates intermediates, it seems more accurate to call this a CAChain.
2023-04-03 15:40:33 +00:00
activeIntermediate, err := provider.ActiveLeafSigningCert()
if err != nil {
return err
}
if activeIntermediate == "" {
return fmt.Errorf("datacenter doesn't have an active intermediate.")
}
intermediateCert, err := connect.ParseCert(activeIntermediate)
if err != nil {
return fmt.Errorf("error parsing active intermediate cert: %v", err)
}
if !forceNow {
if lessThanHalfTimePassed(c.timeNow(), intermediateCert.NotBefore, intermediateCert.NotAfter) {
return nil
}
}
// Enough time has passed, go ahead with getting a new intermediate.
renewalFunc := c.primaryRenewIntermediate
if !isPrimary {
renewalFunc = c.secondaryRequestNewSigningCert
}
if forceNow {
err := renewalFunc(provider, activeRoot)
if err != nil {
return err
}
} else {
errCh := make(chan error, 1)
go func() {
errCh <- renewalFunc(provider, activeRoot)
}()
// Wait for the renewal func to return or for the context to be canceled.
select {
case <-ctx.Done():
return ctx.Err()
case err := <-errCh:
if err != nil {
return err
}
}
}
if err := c.persistNewRootAndConfig(provider, activeRoot, nil); err != nil {
return err
}
c.setCAProvider(provider, activeRoot)
return nil
}
// secondaryCARootWatch maintains a blocking query to the primary datacenter's
// ConnectCA.Roots endpoint to monitor when it needs to request a new signed
// intermediate certificate.
func (c *CAManager) secondaryCARootWatch(ctx context.Context) error {
args := structs.DCSpecificRequest{
Datacenter: c.serverConf.PrimaryDatacenter,
QueryOptions: structs.QueryOptions{
// the maximum time the primary roots watch query can block before returning
MaxQueryTime: c.serverConf.MaxQueryTime,
},
}
c.logger.Debug("starting Connect CA root replication from primary datacenter", "primary", c.serverConf.PrimaryDatacenter)
retryLoopBackoff(ctx, func() error {
var roots structs.IndexedCARoots
if err := c.delegate.forwardDC("ConnectCA.Roots", c.serverConf.PrimaryDatacenter, &args, &roots); err != nil {
return fmt.Errorf("Error retrieving the primary datacenter's roots: %v", err)
}
// Return if the context has been canceled while waiting on the RPC.
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Attempt to update the roots using the returned data.
if err := c.secondaryUpdateRoots(roots); err != nil {
return err
}
args.QueryOptions.MinQueryIndex = nextIndexVal(args.QueryOptions.MinQueryIndex, roots.QueryMeta.Index)
return nil
}, func(err error) {
c.logger.Error("CA root replication failed, will retry",
"routine", secondaryCARootWatchRoutineName,
"error", err,
)
})
return nil
}
// secondaryUpdateRoots updates the cached roots from the primary and regenerates the intermediate
// certificate if necessary.
func (c *CAManager) secondaryUpdateRoots(roots structs.IndexedCARoots) error {
// Update the state first to claim the 'lock'.
if _, err := c.setState(caStateReconfig, true); err != nil {
return err
}
defer c.setState(caStateInitialized, false)
// Update the cached primary roots now that the lock is held.
c.secondarySetPrimaryRoots(roots)
provider, _ := c.getCAProvider()
if provider == nil {
// this happens when leadership is being revoked and this go routine will be stopped
return nil
}
// Run the secondary CA init routine to see if we need to request a new
// intermediate.
if c.secondaryHasProviderRoots() {
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
return nil
}
// Attempt to initialize now that we have updated roots. This is an optimization
// so that we don't have to wait for the Initialize retry backoff if we were
// waiting on roots from the primary to be able to complete initialization.
if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil {
return fmt.Errorf("failed to initialize while updating primary roots: %w", err)
}
if err := c.secondaryInitializeProvider(provider, roots); err != nil {
return fmt.Errorf("Failed to initialize secondary CA provider: %v", err)
}
if err := c.secondaryInitializeIntermediateCA(provider, nil); err != nil {
return fmt.Errorf("Failed to initialize the secondary CA: %v", err)
}
return nil
}
// secondaryInitializeProvider configures the given provider for a secondary, non-root datacenter.
func (c *CAManager) secondaryInitializeProvider(provider ca.Provider, roots structs.IndexedCARoots) error {
if roots.TrustDomain == "" {
return fmt.Errorf("trust domain from primary datacenter is not initialized")
}
clusterID := strings.Split(roots.TrustDomain, ".")[0]
_, conf, err := c.delegate.State().CAConfig(nil)
if err != nil {
return err
}
pCfg := ca.ProviderConfig{
ClusterID: clusterID,
Datacenter: c.serverConf.Datacenter,
IsPrimary: false,
RawConfig: conf.Config,
State: conf.State,
}
if err := provider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}
return nil
}
// secondaryHasProviderRoots returns true after providerRoot has been set. This
// method is used to detect when the secondary has received the roots from the
// primary DC.
func (c *CAManager) secondaryHasProviderRoots() bool {
// TODO: this could potentially also use primaryRoots instead of providerRoot
c.providerLock.Lock()
defer c.providerLock.Unlock()
return c.providerRoot != nil
}
type connectSignRateLimiter struct {
// csrRateLimiter limits the rate of signing new certs if configured. Lazily
// initialized from current config to support dynamic changes.
// csrRateLimiterMu must be held while dereferencing the pointer or storing a
// new one, but methods can be called on the limiter object outside of the
// locked section. This is done only in the getCSRRateLimiterWithLimit method.
csrRateLimiter *rate.Limiter
csrRateLimiterMu sync.RWMutex
// csrConcurrencyLimiter is a dynamically resizable semaphore used to limit
// Sign RPC concurrency if configured. The zero value is usable as soon as
// SetSize is called which we do dynamically in the RPC handler to avoid
// having to hook elaborate synchronization mechanisms through the CA config
// endpoint and config reload etc.
csrConcurrencyLimiter semaphore.Dynamic
}
// getCSRRateLimiterWithLimit returns a rate.Limiter with the desired limit set.
// It uses the shared server-wide limiter unless the limit has been changed in
// config or the limiter has not been setup yet in which case it just-in-time
// configures the new limiter. We assume that limit changes are relatively rare
// and that all callers (there is currently only one) use the same config value
// as the limit. There might be some flapping if there are multiple concurrent
// requests in flight at the time the config changes where A sees the new value
// and updates, B sees the old but then gets this lock second and changes back.
// Eventually though and very soon (once all current RPCs are complete) we are
// guaranteed to have the correct limit set by the next RPC that comes in so I
// assume this is fine. If we observe strange behavior because of it, we could
// add hysteresis that prevents changes too soon after a previous change but
// that seems unnecessary for now.
func (l *connectSignRateLimiter) getCSRRateLimiterWithLimit(limit rate.Limit) *rate.Limiter {
l.csrRateLimiterMu.RLock()
lim := l.csrRateLimiter
l.csrRateLimiterMu.RUnlock()
// If there is a current limiter with the same limit, return it. This should
// be the common case.
if lim != nil && lim.Limit() == limit {
return lim
}
// Need to change limiter, get write lock
l.csrRateLimiterMu.Lock()
defer l.csrRateLimiterMu.Unlock()
// No limiter yet, or limit changed in CA config, reconfigure a new limiter.
// We use burst of 1 for a hard limit. Note that either bursting or waiting is
// necessary to get expected behavior in fact of random arrival times, but we
// don't need both and we use Wait with a small delay to smooth noise. See
// https://github.com/banks/sim-rate-limit-backoff/blob/master/README.md.
l.csrRateLimiter = rate.NewLimiter(limit, 1)
return l.csrRateLimiter
}
// AuthorizeAndSignCertificate signs a leaf certificate for the service or agent
// identified by the SPIFFE ID in the given CSR's SAN. It performs authorization
// using the given acl.Authorizer.
func (c *CAManager) AuthorizeAndSignCertificate(csr *x509.CertificateRequest, authz acl.Authorizer) (*structs.IssuedCert, error) {
// Note that only one spiffe id is allowed currently. If more than one is desired
// in future implmentations, then each ID should have authorization checks.
if len(csr.URIs) != 1 {
return nil, connect.InvalidCSRError("CSR SAN contains an invalid number of URIs: %v", len(csr.URIs))
}
if len(csr.EmailAddresses) > 0 {
return nil, connect.InvalidCSRError("CSR SAN does not allow specifying email addresses")
}
// Parse the SPIFFE ID from the CSR SAN.
spiffeID, err := connect.ParseCertURI(csr.URIs[0])
if err != nil {
return nil, err
}
// Perform authorization.
var authzContext acl.AuthorizerContext
allow := authz.ToAllowAuthorizer()
switch v := spiffeID.(type) {
case *connect.SpiffeIDService:
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.ServiceWriteAllowed(v.Service, &authzContext); err != nil {
return nil, err
}
// Verify that the DC in the service URI matches us. We might relax this
// requirement later but being restrictive for now is safer.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
case *connect.SpiffeIDAgent:
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.NodeWriteAllowed(v.Agent, &authzContext); err != nil {
return nil, err
}
case *connect.SpiffeIDMeshGateway:
// TODO(peering): figure out what is appropriate here for ACLs
v.GetEnterpriseMeta().FillAuthzContext(&authzContext)
if err := allow.MeshWriteAllowed(&authzContext); err != nil {
return nil, err
}
// Verify that the DC in the gateway URI matches us. We might relax this
// requirement later but being restrictive for now is safer.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
case *connect.SpiffeIDServer:
// The authorizer passed in should have unlimited permissions.
if err := allow.ACLWriteAllowed(&authzContext); err != nil {
return nil, err
}
// Verify that the DC in the URI matches us.
// The request must have been issued by a local server.
dc := c.serverConf.Datacenter
if v.Datacenter != dc {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different datacenter: %s, "+
"we are %s", v.Datacenter, dc)
}
default:
return nil, connect.InvalidCSRError("SPIFFE ID in CSR must be a service, mesh-gateway, or agent ID")
}
return c.SignCertificate(csr, spiffeID)
}
func (c *CAManager) SignCertificate(csr *x509.CertificateRequest, spiffeID connect.CertURI) (*structs.IssuedCert, error) {
provider, caRoot := c.getCAProvider()
if provider == nil {
return nil, fmt.Errorf("CA is uninitialized and unable to sign certificates yet: provider is nil")
} else if caRoot == nil {
return nil, fmt.Errorf("CA is uninitialized and unable to sign certificates yet: no root certificate")
}
// Verify that the CSR entity is in the cluster's trust domain
state := c.delegate.State()
_, config, err := state.CAConfig(nil)
if err != nil {
return nil, err
}
signingID := connect.SpiffeIDSigningForCluster(config.ClusterID)
serviceID, isService := spiffeID.(*connect.SpiffeIDService)
agentID, isAgent := spiffeID.(*connect.SpiffeIDAgent)
serverID, isServer := spiffeID.(*connect.SpiffeIDServer)
mgwID, isMeshGateway := spiffeID.(*connect.SpiffeIDMeshGateway)
var entMeta acl.EnterpriseMeta
switch {
case isService:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", serviceID.Host, signingID.Host())
}
entMeta.Merge(serviceID.GetEnterpriseMeta())
case isMeshGateway:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", mgwID.Host, signingID.Host())
}
entMeta.Merge(mgwID.GetEnterpriseMeta())
case isServer:
if !signingID.CanSign(spiffeID) {
return nil, connect.InvalidCSRError("SPIFFE ID in CSR from a different trust domain: %s, "+
"we are %s", serverID.Host, signingID.Host())
}
entMeta.Normalize()
case isAgent:
// isAgent - if we support more ID types then this would need to be an else if
// here we are just automatically fixing the trust domain. For auto-encrypt and
// auto-config they make certificate requests before learning about the roots
// so they will have a dummy trust domain in the CSR.
trustDomain := signingID.Host()
if agentID.Host != trustDomain {
originalURI := agentID.URI()
agentID.Host = trustDomain
// recreate the URIs list
uris := make([]*url.URL, len(csr.URIs))
for i, uri := range csr.URIs {
if originalURI.String() == uri.String() {
uris[i] = agentID.URI()
} else {
uris[i] = uri
}
}
csr.URIs = uris
}
entMeta.Merge(agentID.GetEnterpriseMeta())
default:
return nil, connect.InvalidCSRError("SPIFFE ID in CSR must be a service, agent, server, or mesh gateway ID")
}
commonCfg, err := config.GetCommonConfig()
if err != nil {
return nil, err
}
if commonCfg.CSRMaxPerSecond > 0 {
lim := c.caLeafLimiter.getCSRRateLimiterWithLimit(rate.Limit(commonCfg.CSRMaxPerSecond))
// Wait up to the small threshold we allow for a token.
ctx, cancel := context.WithTimeout(context.Background(), csrLimitWait)
defer cancel()
if lim.Wait(ctx) != nil {
return nil, ErrRateLimited
}
} else if commonCfg.CSRMaxConcurrent > 0 {
c.caLeafLimiter.csrConcurrencyLimiter.SetSize(int64(commonCfg.CSRMaxConcurrent))
ctx, cancel := context.WithTimeout(context.Background(), csrLimitWait)
defer cancel()
if err := c.caLeafLimiter.csrConcurrencyLimiter.Acquire(ctx); err != nil {
return nil, ErrRateLimited
}
defer c.caLeafLimiter.csrConcurrencyLimiter.Release()
}
connect.HackSANExtensionForCSR(csr)
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
// Check if the root expired before using it to sign.
// TODO: we store NotBefore and NotAfter on this struct, so we could avoid
// parsing the cert here.
err = c.checkExpired(caRoot.RootCert)
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
if err != nil {
return nil, fmt.Errorf("root expired: %w", err)
}
if c.isIntermediateUsedToSignLeaf() && len(caRoot.IntermediateCerts) > 0 {
inter := caRoot.IntermediateCerts[len(caRoot.IntermediateCerts)-1]
if err := c.checkExpired(inter); err != nil {
return nil, fmt.Errorf("intermediate expired: %w", err)
}
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
}
// All seems to be in order, actually sign it.
pem, err := provider.Sign(csr)
if err == ca.ErrRateLimited {
return nil, ErrRateLimited
}
if err != nil {
return nil, err
}
// Append any intermediates needed by this root.
for _, p := range caRoot.IntermediateCerts {
pem = pem + lib.EnsureTrailingNewline(p)
}
modIdx, err := c.delegate.ApplyCALeafRequest()
if err != nil {
return nil, err
}
cert, err := connect.ParseCert(pem)
if err != nil {
return nil, err
}
// Set the response
reply := structs.IssuedCert{
SerialNumber: connect.EncodeSerialNumber(cert.SerialNumber),
CertPEM: pem,
ValidAfter: cert.NotBefore,
ValidBefore: cert.NotAfter,
EnterpriseMeta: entMeta,
RaftIndex: structs.RaftIndex{
ModifyIndex: modIdx,
CreateIndex: modIdx,
},
}
switch {
case isService:
reply.Service = serviceID.Service
reply.ServiceURI = cert.URIs[0].String()
case isMeshGateway:
reply.Kind = structs.ServiceKindMeshGateway
reply.KindURI = cert.URIs[0].String()
case isAgent:
reply.Agent = agentID.Agent
reply.AgentURI = cert.URIs[0].String()
case isServer:
reply.ServerURI = cert.URIs[0].String()
default:
return nil, errors.New("not possible")
}
return &reply, nil
}
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
func (c *CAManager) checkExpired(pem string) error {
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
cert, err := connect.ParseCert(pem)
if err != nil {
return err
}
if cert.NotAfter.Before(c.timeNow()) {
check expiry date of the root/intermediate before using it to sign a leaf (#10500) * ca: move provider creation into CAManager This further decouples the CAManager from Server. It reduces the interface between them and removes the need for the SetLogger method on providers. * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * ca: move SignCertificate to the file where it is used * auto-config: move autoConfigBackend impl off of Server Most of these methods are used exclusively for the AutoConfig RPC endpoint. This PR uses a pattern that we've used in other places as an incremental step to reducing the scope of Server. * fix linter issues * check error when `raftApplyMsgpack` * ca: move SignCertificate to CAManager To reduce the scope of Server, and keep all the CA logic together * check expiry date of the intermediate before using it to sign a leaf * fix typo in comment Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> * Fix test name * do not check cert start date * wrap error to mention it is the intermediate expired * Fix failing test * update comment Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> * use shim to avoid sleep in test * add root cert validation * remove duplicate code * Revert "fix linter issues" This reverts commit 6356302b54f06c8f2dee8e59740409d49e84ef24. * fix import issue * gofmt leader_connect_ca * add changelog entry * update error message Co-authored-by: Freddy <freddygv@users.noreply.github.com> * fix error message in test Co-authored-by: Daniel Nephin <dnephin@hashicorp.com> Co-authored-by: Kyle Havlovitz <kylehav@gmail.com> Co-authored-by: Freddy <freddygv@users.noreply.github.com>
2021-07-13 16:15:06 +00:00
return fmt.Errorf("certificate expired, expiration date: %s ", cert.NotAfter.String())
}
return nil
}
func primaryUsesIntermediate(provider ca.Provider) bool {
_, ok := provider.(ca.PrimaryUsesIntermediate)
return ok
}
func (c *CAManager) isIntermediateUsedToSignLeaf() bool {
if c.serverConf.Datacenter != c.serverConf.PrimaryDatacenter {
return true
}
provider, _ := c.getCAProvider()
return primaryUsesIntermediate(provider)
}
2022-04-14 20:55:10 +00:00
func providerPrettyName(provider string) string {
switch provider {
case "consul":
return "Consul"
case "vault":
return "Vault"
case "aws-pca":
return "Aws-Pca"
case "provider-name":
return "Provider-Name"
default:
return provider
}
}