diff --git a/.changelog/11514.txt b/.changelog/11514.txt new file mode 100644 index 000000000..d869f46ab --- /dev/null +++ b/.changelog/11514.txt @@ -0,0 +1,6 @@ +```release-note:improvement +connect/ca: Return an error when querying roots from uninitialized CA. +``` +```release-note:bug +connect/ca: Allow secondary initialization to resume after being deferred due to unreachable or incompatible primary DC servers. +``` \ No newline at end of file diff --git a/agent/consul/leader_connect_ca.go b/agent/consul/leader_connect_ca.go index 03b2acbc9..413f0d20e 100644 --- a/agent/consul/leader_connect_ca.go +++ b/agent/consul/leader_connect_ca.go @@ -44,7 +44,7 @@ type caServerDelegate interface { forwardDC(method, dc string, args interface{}, reply interface{}) error generateCASignRequest(csr string) *structs.CASignRequest - checkServersProvider + ServersSupportMultiDCConnectCA() error } // CAManager is a wrapper around CA operations such as updating roots, an intermediate @@ -127,6 +127,17 @@ func (c *caDelegateWithState) generateCASignRequest(csr string) *structs.CASignR } } +func (c *caDelegateWithState) ServersSupportMultiDCConnectCA() error { + versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.Server, c.Server.config.PrimaryDatacenter, minMultiDCConnectVersion) + if !primaryFound { + return fmt.Errorf("primary datacenter is unreachable") + } + if !versionOk { + return fmt.Errorf("all servers in the primary datacenter are not at the minimum version %v", minMultiDCConnectVersion) + } + return nil +} + func NewCAManager(delegate caServerDelegate, leaderRoutineManager *routine.Manager, logger hclog.Logger, config *Config) *CAManager { return &CAManager{ delegate: delegate, @@ -202,7 +213,8 @@ func (c *CAManager) initializeCAConfig() (*structs.CAConfiguration, error) { } if config == nil { config = c.serverConf.CAConfig - if config.ClusterID == "" { + + if c.serverConf.Datacenter == c.serverConf.PrimaryDatacenter && config.ClusterID == "" { id, err := uuid.GenerateUUID() if err != nil { return nil, err @@ -408,18 +420,8 @@ func (c *CAManager) InitializeCA() (reterr error) { } func (c *CAManager) secondaryInitialize(provider ca.Provider, conf *structs.CAConfiguration) error { - // If this isn't the primary DC, run the secondary DC routine if the primary has already been upgraded to at least 1.6.0 - versionOk, foundPrimary := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) - if !foundPrimary { - c.logger.Warn("primary datacenter is configured but unreachable - deferring initialization of the secondary datacenter CA") - // return nil because we will initialize the secondary CA later - return nil - } else if !versionOk { - // return nil because we will initialize the secondary CA later - c.logger.Warn("servers in the primary datacenter are not at least at the minimum version - deferring initialization of the secondary datacenter CA", - "min_version", minMultiDCConnectVersion.String(), - ) - return nil + if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil { + return fmt.Errorf("initialization will be deferred: %w", err) } // Get the root CA to see if we need to refresh our intermediate. @@ -1266,15 +1268,11 @@ func (c *CAManager) secondaryUpdateRoots(roots structs.IndexedCARoots) error { return nil } if !c.secondaryIsCAConfigured() { - versionOk, primaryFound := ServersInDCMeetMinimumVersion(c.delegate, c.serverConf.PrimaryDatacenter, minMultiDCConnectVersion) - if !primaryFound { - return fmt.Errorf("Primary datacenter is unreachable - deferring secondary CA initialization") + if err := c.delegate.ServersSupportMultiDCConnectCA(); err != nil { + return fmt.Errorf("failed to initialize while updating primary roots: %w", err) } - - if versionOk { - if err := c.secondaryInitializeProvider(provider, roots); err != nil { - return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) - } + if err := c.secondaryInitializeProvider(provider, roots); err != nil { + return fmt.Errorf("Failed to initialize secondary CA provider: %v", err) } } diff --git a/agent/consul/leader_connect_ca_test.go b/agent/consul/leader_connect_ca_test.go index c69373e51..9ea55fd2d 100644 --- a/agent/consul/leader_connect_ca_test.go +++ b/agent/consul/leader_connect_ca_test.go @@ -14,15 +14,12 @@ import ( "testing" "time" - "github.com/hashicorp/go-version" - "github.com/hashicorp/serf/serf" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/connect" ca "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/consul/state" - "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/sdk/testutil" @@ -60,12 +57,8 @@ func (m *mockCAServerDelegate) IsLeader() bool { return true } -func (m *mockCAServerDelegate) CheckServers(datacenter string, fn func(*metadata.Server) bool) { - ver, _ := version.NewVersion("1.6.0") - fn(&metadata.Server{ - Status: serf.StatusAlive, - Build: *ver, - }) +func (m *mockCAServerDelegate) ServersSupportMultiDCConnectCA() error { + return nil } func (m *mockCAServerDelegate) ApplyCALeafRequest() (uint64, error) { diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 0151d068c..f4a65bf9e 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -1094,7 +1094,6 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) { // Wait for the secondary transition to happen and then verify the secondary DC // has both roots present. - secondaryProvider, _ := getCAProviderWithLock(s2) retry.Run(t, func(r *retry.R) { state1 := s1.fsm.State() _, roots1, err := state1.CARoots(nil) @@ -1110,15 +1109,18 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) { require.Equal(r, roots1[0].ID, roots2[0].ID) require.Equal(r, roots1[0].RootCert, roots2[0].RootCert) + secondaryProvider, _ := getCAProviderWithLock(s2) inter, err := secondaryProvider.ActiveIntermediate() require.NoError(r, err) require.NotEmpty(r, inter, "should have valid intermediate") }) - _, caRoot := getCAProviderWithLock(s1) + secondaryProvider, _ := getCAProviderWithLock(s2) intermediatePEM, err := secondaryProvider.ActiveIntermediate() require.NoError(t, err) + _, caRoot := getCAProviderWithLock(s1) + // Have dc2 sign a leaf cert and make sure the chain is correct. spiffeService := &connect.SpiffeIDService{ Host: "node1", diff --git a/agent/consul/server_connect.go b/agent/consul/server_connect.go index 09453a5ee..f42443799 100644 --- a/agent/consul/server_connect.go +++ b/agent/consul/server_connect.go @@ -16,19 +16,23 @@ func (s *Server) getCARoots(ws memdb.WatchSet, state *state.Store) (*structs.Ind if err != nil { return nil, err } + if config == nil { + return nil, fmt.Errorf("CA has not finished initializing") + } indexedRoots := &structs.IndexedCARoots{} - if config != nil { - // Build TrustDomain based on the ClusterID stored. - signingID := connect.SpiffeIDSigningForCluster(config) - if signingID == nil { - // If CA is bootstrapped at all then this should never happen but be - // defensive. - return nil, fmt.Errorf("no cluster trust domain setup") - } + // Build TrustDomain based on the ClusterID stored. + signingID := connect.SpiffeIDSigningForCluster(config) + if signingID == nil { + // If CA is bootstrapped at all then this should never happen but be + // defensive. + return nil, fmt.Errorf("no cluster trust domain setup") + } - indexedRoots.TrustDomain = signingID.Host() + indexedRoots.TrustDomain = signingID.Host() + if indexedRoots.TrustDomain == "" { + return nil, fmt.Errorf("CA has not finished initializing") } indexedRoots.Index, indexedRoots.Roots = index, roots diff --git a/agent/consul/state/connect_ca.go b/agent/consul/state/connect_ca.go index ab54ae69a..0b35d0393 100644 --- a/agent/consul/state/connect_ca.go +++ b/agent/consul/state/connect_ca.go @@ -180,8 +180,6 @@ func (s *Store) caSetConfigTxn(idx uint64, tx WriteTxn, config *structs.CAConfig if prev != nil { existing := prev.(*structs.CAConfiguration) config.CreateIndex = existing.CreateIndex - // Allow the ClusterID to change if it's provided by an internal operation, such - // as a primary datacenter being switched to secondary mode. if config.ClusterID == "" { config.ClusterID = existing.ClusterID }