connect: connect CA Roots in the primary datacenter should use a SigningKeyID derived from their local intermediate (#9428)
This fixes an issue where leaf certificates issued in primary datacenters using Vault as a Connect CA would be reissued very frequently (every ~20 seconds) because the logic meant to detect root rotation was errantly triggering. The hash of the rootCA was being compared against a hash of the intermediateCA and always failing. This doesn't apply to the Consul built-in CA provider because there is no intermediate in use in the primary DC. This is reminiscent of #6513
This commit is contained in:
parent
8b91cae80f
commit
91d9544803
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
connect: connect CA Roots in the primary datacenter should use a SigningKeyID derived from their local intermediate
|
||||
```
|
|
@ -359,10 +359,14 @@ ifeq ("$(CIRCLECI)","true")
|
|||
gotestsum --format=short-verbose --junitfile "$(TEST_RESULTS_DIR)/gotestsum-report.xml" -- -cover -coverprofile=coverage.txt ./agent/connect/ca
|
||||
# Run leader tests that require Vault
|
||||
gotestsum --format=short-verbose --junitfile "$(TEST_RESULTS_DIR)/gotestsum-report-leader.xml" -- -cover -coverprofile=coverage-leader.txt -run TestLeader_Vault_ ./agent/consul
|
||||
# Run agent tests that require Vault
|
||||
gotestsum --format=short-verbose --junitfile "$(TEST_RESULTS_DIR)/gotestsum-report-agent.xml" -- -cover -coverprofile=coverage-agent.txt -run '.*_Vault_' ./agent
|
||||
else
|
||||
# Run locally
|
||||
@echo "Running /agent/connect/ca tests in verbose mode"
|
||||
@go test -v ./agent/connect/ca
|
||||
@go test -v ./agent/consul -run 'TestLeader_Vault_'
|
||||
@go test -v ./agent -run '.*_Vault_'
|
||||
endif
|
||||
|
||||
proto: $(PROTOGOFILES) $(PROTOGOBINFILES)
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/config"
|
||||
"github.com/hashicorp/consul/agent/connect"
|
||||
"github.com/hashicorp/consul/agent/connect/ca"
|
||||
"github.com/hashicorp/consul/agent/consul"
|
||||
"github.com/hashicorp/consul/agent/debug"
|
||||
"github.com/hashicorp/consul/agent/local"
|
||||
|
@ -5788,6 +5789,131 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAgentConnectCALeafCert_Vault_doesNotChurnLeafCertsAtIdle(t *testing.T) {
|
||||
ca.SkipIfVaultNotPresent(t)
|
||||
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
}
|
||||
|
||||
t.Parallel()
|
||||
|
||||
testVault := ca.NewTestVaultServer(t)
|
||||
defer testVault.Stop()
|
||||
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
a := StartTestAgent(t, TestAgent{Overrides: fmt.Sprintf(`
|
||||
connect {
|
||||
test_ca_leaf_root_change_spread = "1ns"
|
||||
ca_provider = "vault"
|
||||
ca_config {
|
||||
address = %[1]q
|
||||
token = %[2]q
|
||||
root_pki_path = "pki-root/"
|
||||
intermediate_pki_path = "pki-intermediate/"
|
||||
}
|
||||
}
|
||||
`, testVault.Addr, testVault.RootToken)})
|
||||
defer a.Shutdown()
|
||||
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
|
||||
testrpc.WaitForActiveCARoot(t, a.RPC, "dc1", nil)
|
||||
|
||||
var ca1 *structs.CARoot
|
||||
{
|
||||
args := &structs.DCSpecificRequest{Datacenter: "dc1"}
|
||||
var reply structs.IndexedCARoots
|
||||
require.NoError(a.RPC("ConnectCA.Roots", args, &reply))
|
||||
for _, r := range reply.Roots {
|
||||
if r.ID == reply.ActiveRootID {
|
||||
ca1 = r
|
||||
break
|
||||
}
|
||||
}
|
||||
require.NotNil(ca1)
|
||||
}
|
||||
|
||||
{
|
||||
// Register a local service
|
||||
args := &structs.ServiceDefinition{
|
||||
ID: "foo",
|
||||
Name: "test",
|
||||
Address: "127.0.0.1",
|
||||
Port: 8000,
|
||||
Check: structs.CheckType{
|
||||
TTL: 15 * time.Second,
|
||||
},
|
||||
}
|
||||
req, _ := http.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args))
|
||||
resp := httptest.NewRecorder()
|
||||
_, err := a.srv.AgentRegisterService(resp, req)
|
||||
require.NoError(err)
|
||||
if !assert.Equal(200, resp.Code) {
|
||||
t.Log("Body: ", resp.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// List
|
||||
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
|
||||
require.NoError(err)
|
||||
require.Equal("MISS", resp.Header().Get("X-Cache"))
|
||||
|
||||
// Get the issued cert
|
||||
issued, ok := obj.(*structs.IssuedCert)
|
||||
assert.True(ok)
|
||||
|
||||
// Verify that the cert is signed by the CA
|
||||
requireLeafValidUnderCA(t, issued, ca1)
|
||||
|
||||
// Verify blocking index
|
||||
assert.True(issued.ModifyIndex > 0)
|
||||
assert.Equal(fmt.Sprintf("%d", issued.ModifyIndex),
|
||||
resp.Header().Get("X-Consul-Index"))
|
||||
|
||||
// Test caching
|
||||
{
|
||||
// Fetch it again
|
||||
resp := httptest.NewRecorder()
|
||||
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
|
||||
require.NoError(err)
|
||||
require.Equal(obj, obj2)
|
||||
|
||||
// Should cache hit this time and not make request
|
||||
require.Equal("HIT", resp.Header().Get("X-Cache"))
|
||||
}
|
||||
|
||||
// Test that we aren't churning leaves for no reason at idle.
|
||||
{
|
||||
ch := make(chan error, 1)
|
||||
go func() {
|
||||
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+strconv.Itoa(int(issued.ModifyIndex)), nil)
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
|
||||
if err != nil {
|
||||
ch <- err
|
||||
} else {
|
||||
issued2 := obj.(*structs.IssuedCert)
|
||||
if issued.CertPEM == issued2.CertPEM {
|
||||
ch <- fmt.Errorf("leaf woke up unexpectedly with same cert")
|
||||
} else {
|
||||
ch <- fmt.Errorf("leaf woke up unexpectedly with new cert")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
case err := <-ch:
|
||||
dur := time.Since(start)
|
||||
t.Fatalf("unexpected return from blocking query; leaf churned during idle period, took %s: %v", dur, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
|
|
|
@ -416,7 +416,7 @@ func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfi
|
|||
if err != nil {
|
||||
return fmt.Errorf("error generating intermediate cert: %v", err)
|
||||
}
|
||||
_, err = connect.ParseCert(interPEM)
|
||||
intermediateCert, err := connect.ParseCert(interPEM)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting intermediate cert: %v", err)
|
||||
}
|
||||
|
@ -439,6 +439,13 @@ func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfi
|
|||
}
|
||||
}
|
||||
|
||||
// Versions prior to 1.9.3, 1.8.8, and 1.7.12 incorrectly used the primary
|
||||
// rootCA's subjectKeyID here instead of the intermediate. For
|
||||
// provider=consul this didn't matter since there are no intermediates in
|
||||
// the primaryDC, but for vault it does matter.
|
||||
expectedSigningKeyID := connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
|
||||
needsSigningKeyUpdate := (rootCA.SigningKeyID != expectedSigningKeyID)
|
||||
|
||||
// Check if the CA root is already initialized and exit if it is,
|
||||
// adding on any existing intermediate certs since they aren't directly
|
||||
// tied to the provider.
|
||||
|
@ -449,7 +456,10 @@ func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfi
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if activeRoot != nil {
|
||||
if activeRoot != nil && needsSigningKeyUpdate {
|
||||
c.logger.Info("Correcting stored SigningKeyID value", "previous", rootCA.SigningKeyID, "updated", expectedSigningKeyID)
|
||||
|
||||
} else if activeRoot != nil && !needsSigningKeyUpdate {
|
||||
// This state shouldn't be possible to get into because we update the root and
|
||||
// CA config in the same FSM operation.
|
||||
if activeRoot.ID != rootCA.ID {
|
||||
|
@ -462,6 +472,10 @@ func (c *CAManager) initializeRootCA(provider ca.Provider, conf *structs.CAConfi
|
|||
return nil
|
||||
}
|
||||
|
||||
if needsSigningKeyUpdate {
|
||||
rootCA.SigningKeyID = expectedSigningKeyID
|
||||
}
|
||||
|
||||
// Get the highest index
|
||||
idx, _, err := state.CARoots(nil)
|
||||
if err != nil {
|
||||
|
|
|
@ -579,6 +579,111 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) {
|
|||
require.NoError(err)
|
||||
}
|
||||
|
||||
func TestLeader_Vault_PrimaryCA_FixSigningKeyID_OnRestart(t *testing.T) {
|
||||
ca.SkipIfVaultNotPresent(t)
|
||||
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
}
|
||||
|
||||
t.Parallel()
|
||||
|
||||
testVault := ca.NewTestVaultServer(t)
|
||||
defer testVault.Stop()
|
||||
|
||||
dir1pre, s1pre := testServerWithConfig(t, func(c *Config) {
|
||||
c.Build = "1.6.0"
|
||||
c.PrimaryDatacenter = "dc1"
|
||||
c.CAConfig = &structs.CAConfiguration{
|
||||
Provider: "vault",
|
||||
Config: map[string]interface{}{
|
||||
"Address": testVault.Addr,
|
||||
"Token": testVault.RootToken,
|
||||
"RootPKIPath": "pki-root/",
|
||||
"IntermediatePKIPath": "pki-intermediate/",
|
||||
},
|
||||
}
|
||||
})
|
||||
defer os.RemoveAll(dir1pre)
|
||||
defer s1pre.Shutdown()
|
||||
|
||||
testrpc.WaitForLeader(t, s1pre.RPC, "dc1")
|
||||
|
||||
// Restore the pre-1.9.3/1.8.8/1.7.12 behavior of the SigningKeyID not being derived
|
||||
// from the intermediates in the primary (which only matters for provider=vault).
|
||||
var primaryRootSigningKeyID string
|
||||
{
|
||||
state := s1pre.fsm.State()
|
||||
|
||||
// Get the highest index
|
||||
idx, activePrimaryRoot, err := state.CARootActive(nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, activePrimaryRoot)
|
||||
|
||||
rootCert, err := connect.ParseCert(activePrimaryRoot.RootCert)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Force this to be derived just from the root, not the intermediate.
|
||||
primaryRootSigningKeyID = connect.EncodeSigningKeyID(rootCert.SubjectKeyId)
|
||||
activePrimaryRoot.SigningKeyID = primaryRootSigningKeyID
|
||||
|
||||
// Store the root cert in raft
|
||||
resp, err := s1pre.raftApply(structs.ConnectCARequestType, &structs.CARequest{
|
||||
Op: structs.CAOpSetRoots,
|
||||
Index: idx,
|
||||
Roots: []*structs.CARoot{activePrimaryRoot},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
if respErr, ok := resp.(error); ok {
|
||||
t.Fatalf("respErr: %v", respErr)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown s1pre and restart it to trigger the secondary CA init to correct
|
||||
// the SigningKeyID.
|
||||
s1pre.Shutdown()
|
||||
|
||||
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||
c.DataDir = s1pre.config.DataDir
|
||||
c.Datacenter = "dc1"
|
||||
c.PrimaryDatacenter = "dc1"
|
||||
c.NodeName = s1pre.config.NodeName
|
||||
c.NodeID = s1pre.config.NodeID
|
||||
})
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// Retry since it will take some time to init the primary CA fully and there
|
||||
// isn't a super clean way to watch specifically until it's done than polling
|
||||
// the CA provider anyway.
|
||||
retry.Run(t, func(r *retry.R) {
|
||||
// verify that the root is now corrected
|
||||
provider, activeRoot := getCAProviderWithLock(s1)
|
||||
require.NotNil(r, provider)
|
||||
require.NotNil(r, activeRoot)
|
||||
|
||||
activeIntermediate, err := provider.ActiveIntermediate()
|
||||
require.NoError(r, err)
|
||||
|
||||
intermediateCert, err := connect.ParseCert(activeIntermediate)
|
||||
require.NoError(r, err)
|
||||
|
||||
// Force this to be derived just from the root, not the intermediate.
|
||||
expect := connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
|
||||
|
||||
// The in-memory representation was saw the correction via a setCAProvider call.
|
||||
require.Equal(r, expect, activeRoot.SigningKeyID)
|
||||
|
||||
// The state store saw the correction, too.
|
||||
_, activePrimaryRoot, err := s1.fsm.State().CARootActive(nil)
|
||||
require.NoError(r, err)
|
||||
require.NotNil(r, activePrimaryRoot)
|
||||
require.Equal(r, expect, activePrimaryRoot.SigningKeyID)
|
||||
})
|
||||
}
|
||||
|
||||
func TestLeader_SecondaryCA_FixSigningKeyID_via_IntermediateRefresh(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
|
|
Loading…
Reference in New Issue