diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index ee9faca98..1de613085 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -14,6 +14,7 @@ import ( "os" "reflect" "sort" + "strconv" "strings" "testing" "time" @@ -4868,15 +4869,209 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) { } } -func requireLeafValidUnderCA(t *testing.T, issued *structs.IssuedCert, - ca *structs.CARoot) { +func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + require := require.New(t) + + a1 := NewTestAgent(t, t.Name()+"-dc1", ` + datacenter = "dc1" + primary_datacenter = "dc1" + `) + defer a1.Shutdown() + testrpc.WaitForTestAgent(t, a1.RPC, "dc1") + + a2 := NewTestAgent(t, t.Name()+"-dc2", ` + datacenter = "dc2" + primary_datacenter = "dc1" + `) + defer a2.Shutdown() + testrpc.WaitForTestAgent(t, a2.RPC, "dc2") + + // Wait for the WAN join. + addr := fmt.Sprintf("127.0.0.1:%d", a1.Config.SerfPortWAN) + _, err := a2.JoinWAN([]string{addr}) + require.NoError(err) + + testrpc.WaitForLeader(t, a1.RPC, "dc1") + testrpc.WaitForLeader(t, a2.RPC, "dc2") + retry.Run(t, func(r *retry.R) { + if got, want := len(a1.WANMembers()), 2; got < want { + r.Fatalf("got %d WAN members want at least %d", got, want) + } + }) + + // CA already setup by default by NewTestAgent but force a new one so we can + // verify it was signed easily. + dc1_ca1 := connect.TestCAConfigSet(t, a1, nil) + + // Wait until root is updated in both dcs. + waitForActiveCARoot(t, a1.srv, dc1_ca1) + waitForActiveCARoot(t, a2.srv, dc1_ca1) + + { + // Register a local service in the SECONDARY + args := &structs.ServiceDefinition{ + ID: "foo", + Name: "test", + Address: "127.0.0.1", + Port: 8000, + Check: structs.CheckType{ + TTL: 15 * time.Second, + }, + } + req, _ := http.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args)) + resp := httptest.NewRecorder() + _, err := a2.srv.AgentRegisterService(resp, req) + require.NoError(err) + if !assert.Equal(200, resp.Code) { + t.Log("Body: ", resp.Body.String()) + } + } + + // List + req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) + resp := httptest.NewRecorder() + obj, err := a2.srv.AgentConnectCALeafCert(resp, req) + require.NoError(err) + require.Equal("MISS", resp.Header().Get("X-Cache")) + + // Get the issued cert + issued, ok := obj.(*structs.IssuedCert) + assert.True(ok) + + // Verify that the cert is signed by the CA + requireLeafValidUnderCA(t, issued, dc1_ca1) + + // Verify blocking index + assert.True(issued.ModifyIndex > 0) + assert.Equal(fmt.Sprintf("%d", issued.ModifyIndex), + resp.Header().Get("X-Consul-Index")) + + // Test caching + { + // Fetch it again + resp := httptest.NewRecorder() + obj2, err := a2.srv.AgentConnectCALeafCert(resp, req) + require.NoError(err) + require.Equal(obj, obj2) + + // Should cache hit this time and not make request + require.Equal("HIT", resp.Header().Get("X-Cache")) + } + + // Test that we aren't churning leaves for no reason at idle. + { + ch := make(chan error, 1) + go func() { + req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+strconv.Itoa(int(issued.ModifyIndex)), nil) + resp := httptest.NewRecorder() + obj, err := a2.srv.AgentConnectCALeafCert(resp, req) + if err != nil { + ch <- err + } else { + issued2 := obj.(*structs.IssuedCert) + if issued.CertPEM == issued2.CertPEM { + ch <- fmt.Errorf("leaf woke up unexpectedly with same cert") + } else { + ch <- fmt.Errorf("leaf woke up unexpectedly with new cert") + } + } + }() + + start := time.Now() + + // Before applying the fix from PR-6513 this would reliably wake up + // after ~20ms with a new cert. Since this test is necessarily a bit + // timing dependent we'll chill out for 5 seconds which should be enough + // time to disprove the original bug. + select { + case <-time.After(5 * time.Second): + case err := <-ch: + dur := time.Since(start) + t.Fatalf("unexpected return from blocking query; leaf churned during idle period, took %s: %v", dur, err) + } + } + + // Set a new CA + dc1_ca2 := connect.TestCAConfigSet(t, a2, nil) + + // Wait until root is updated in both dcs. + waitForActiveCARoot(t, a1.srv, dc1_ca2) + waitForActiveCARoot(t, a2.srv, dc1_ca2) + + // Test that caching is updated in the background + retry.Run(t, func(r *retry.R) { + resp := httptest.NewRecorder() + // Try and sign again (note no index/wait arg since cache should update in + // background even if we aren't actively blocking) + obj, err := a2.srv.AgentConnectCALeafCert(resp, req) + r.Check(err) + + issued2 := obj.(*structs.IssuedCert) + if issued.CertPEM == issued2.CertPEM { + r.Fatalf("leaf has not updated") + } + + // Got a new leaf. Sanity check it's a whole new key as well as different + // cert. + if issued.PrivateKeyPEM == issued2.PrivateKeyPEM { + r.Fatalf("new leaf has same private key as before") + } + + // Verify that the cert is signed by the new CA + requireLeafValidUnderCA(t, issued2, dc1_ca2) + + // Should be a cache hit! The data should've updated in the cache + // in the background so this should've been fetched directly from + // the cache. + if resp.Header().Get("X-Cache") != "HIT" { + r.Fatalf("should be a cache hit") + } + }) +} + +func waitForActiveCARoot(t *testing.T, srv *HTTPServer, expect *structs.CARoot) { + retry.Run(t, func(r *retry.R) { + req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/roots", nil) + resp := httptest.NewRecorder() + obj, err := srv.AgentConnectCARoots(resp, req) + if err != nil { + r.Fatalf("err: %v", err) + } + + roots, ok := obj.(structs.IndexedCARoots) + if !ok { + r.Fatalf("response is wrong type %T", obj) + } + + var root *structs.CARoot + for _, r := range roots.Roots { + if r.ID == roots.ActiveRootID { + root = r + break + } + } + if root == nil { + r.Fatal("no active root") + } + if root.ID != expect.ID { + r.Fatalf("current active root is %s; waiting for %s", root.ID, expect.ID) + } + }) +} + +func requireLeafValidUnderCA(t *testing.T, issued *structs.IssuedCert, ca *structs.CARoot) { + leaf, intermediates, err := connect.ParseLeafCerts(issued.CertPEM) + require.NoError(t, err) roots := x509.NewCertPool() require.True(t, roots.AppendCertsFromPEM([]byte(ca.RootCert))) - leaf, err := connect.ParseCert(issued.CertPEM) - require.NoError(t, err) + _, err = leaf.Verify(x509.VerifyOptions{ - Roots: roots, + Roots: roots, + Intermediates: intermediates, }) require.NoError(t, err) diff --git a/agent/cache-types/connect_ca_leaf.go b/agent/cache-types/connect_ca_leaf.go index f9983dd29..137a3c400 100644 --- a/agent/cache-types/connect_ca_leaf.go +++ b/agent/cache-types/connect_ca_leaf.go @@ -96,9 +96,9 @@ type ConnectCALeaf struct { // since all times we get from our wall clock should point to the same Location // anyway. type fetchState struct { - // authorityKeyID is the key ID of the CA root that signed the current cert. - // This is just to save parsing the whole cert everytime we have to check if - // the root changed. + // authorityKeyId is the ID of the CA key (whether root or intermediate) that signed + // the current cert. This is just to save parsing the whole cert everytime + // we have to check if the root changed. authorityKeyID string // forceExpireAfter is used to coordinate renewing certs after a CA rotation @@ -362,7 +362,7 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache expiresAt = state.forceExpireAfter } - if expiresAt == now || expiresAt.Before(now) { + if expiresAt.Equal(now) || expiresAt.Before(now) { // Already expired, just make a new one right away return c.generateNewLeaf(reqReal, lastResultWithNewState()) } diff --git a/agent/connect/parsing.go b/agent/connect/parsing.go index 9e700e535..26d9e2ea2 100644 --- a/agent/connect/parsing.go +++ b/agent/connect/parsing.go @@ -29,6 +29,67 @@ func ParseCert(pemValue string) (*x509.Certificate, error) { return x509.ParseCertificate(block.Bytes) } +// ParseLeafCerts parses all of the x509 certificates from a PEM-encoded value +// under the assumption that the first cert is a leaf (non-CA) cert and the +// rest are intermediate CA certs. +// +// If no certificates are found this returns an error. +func ParseLeafCerts(pemValue string) (*x509.Certificate, *x509.CertPool, error) { + certs, err := parseCerts(pemValue) + if err != nil { + return nil, nil, err + } + + leaf := certs[0] + if leaf.IsCA { + return nil, nil, fmt.Errorf("first PEM-block should be a leaf cert") + } + + intermediates := x509.NewCertPool() + for _, cert := range certs[1:] { + if !cert.IsCA { + return nil, nil, fmt.Errorf("found an unexpected leaf cert after the first PEM-block") + } + intermediates.AddCert(cert) + } + + return leaf, intermediates, nil +} + +// ParseCerts parses the all x509 certificates from a PEM-encoded value. +// The first returned cert is a leaf cert and any other ones are intermediates. +// +// If no certificates are found this returns an error. +func parseCerts(pemValue string) ([]*x509.Certificate, error) { + var out []*x509.Certificate + + rest := []byte(pemValue) + for { + // The _ result below is not an error but the remaining PEM bytes. + block, remaining := pem.Decode(rest) + if block == nil { + break + } + rest = remaining + + if block.Type != "CERTIFICATE" { + return nil, fmt.Errorf("PEM-block should be CERTIFICATE type") + } + + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return nil, err + } + out = append(out, cert) + } + + if len(out) == 0 { + return nil, fmt.Errorf("no PEM-encoded data found") + } + + return out, nil +} + // CalculateCertFingerprint parses the x509 certificate from a PEM-encoded value // and calculates the SHA-1 fingerprint. func CalculateCertFingerprint(pemValue string) (string, error) { diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index 02ad21556..3f463b48b 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -290,7 +290,10 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index return err } - var storedRootID string + var ( + storedRootID string + expectedSigningKeyID string + ) if activeIntermediate != "" { storedRoot, err := provider.ActiveRoot() if err != nil { @@ -301,6 +304,12 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index if err != nil { return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, roots) } + + intermediateCert, err := connect.ParseCert(activeIntermediate) + if err != nil { + return fmt.Errorf("error parsing active intermediate cert: %v", err) + } + expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) } var newActiveRoot *structs.CARoot @@ -314,10 +323,21 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index return fmt.Errorf("primary datacenter does not have an active root CA for Connect") } - newIntermediate := false // Get a signed intermediate from the primary DC if the provider // hasn't been initialized yet or if the primary's root has changed. + needsNewIntermediate := false if activeIntermediate == "" || storedRootID != roots.ActiveRootID { + needsNewIntermediate = true + } + + // Also we take this opportunity to correct an incorrectly persisted SigningKeyID + // in secondary datacenters (see PR-6513). + if expectedSigningKeyID != "" && newActiveRoot.SigningKeyID != expectedSigningKeyID { + needsNewIntermediate = true + } + + newIntermediate := false + if needsNewIntermediate { csr, err := provider.GenerateIntermediateCSR() if err != nil { return err @@ -334,8 +354,14 @@ func (s *Server) initializeSecondaryCA(provider ca.Provider, roots structs.Index return fmt.Errorf("Failed to set the intermediate certificate with the CA provider: %v", err) } + intermediateCert, err := connect.ParseCert(intermediatePEM) + if err != nil { + return fmt.Errorf("error parsing intermediate cert: %v", err) + } + // Append the new intermediate to our local active root entry. newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) + newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) newIntermediate = true s.logger.Printf("[INFO] connect: received new intermediate certificate from primary datacenter") diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index e041b1468..06c7fb4cc 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -145,13 +145,18 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { require.NoError(err) require.NotEmpty(oldIntermediatePEM) - // Store the current root - rootReq := &structs.DCSpecificRequest{ - Datacenter: "dc1", + // Capture the current root + var originalRoot *structs.CARoot + { + rootList, activeRoot, err := getTestRoots(s1, "dc1") + require.NoError(err) + require.Len(rootList.Roots, 1) + originalRoot = activeRoot } - var rootList structs.IndexedCARoots - require.NoError(s1.RPC("ConnectCA.Roots", rootReq, &rootList)) - require.Len(rootList.Roots, 1) + + // Wait for current state to be reflected in both datacenters. + waitForActiveCARoot(t, s1, "dc1", originalRoot) + waitForActiveCARoot(t, s2, "dc2", originalRoot) // Update the provider config to use a new private key, which should // cause a rotation. @@ -175,6 +180,14 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { require.NoError(s1.RPC("ConnectCA.ConfigurationSet", args, &reply)) } + var updatedRoot *structs.CARoot + { + rootList, activeRoot, err := getTestRoots(s1, "dc1") + require.NoError(err) + require.Len(rootList.Roots, 2) + updatedRoot = activeRoot + } + // Wait for dc2's intermediate to be refreshed. var intermediatePEM string retry.Run(t, func(r *retry.R) { @@ -186,6 +199,9 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { }) require.NoError(err) + waitForActiveCARoot(t, s1, "dc1", updatedRoot) + waitForActiveCARoot(t, s2, "dc2", updatedRoot) + // Verify the root lists have been rotated in each DC's state store. state1 := s1.fsm.State() _, primaryRoot, err := state1.CARootActive(nil) @@ -243,6 +259,99 @@ func TestLeader_SecondaryCA_IntermediateRefresh(t *testing.T) { require.NoError(err) } +func TestLeader_SecondaryCA_FixSigningKeyID_via_IntermediateRefresh(t *testing.T) { + t.Parallel() + + require := require.New(t) + + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Build = "1.6.0" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // dc2 as a secondary DC + dir2pre, s2pre := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc1" + c.Build = "1.6.0" + }) + defer os.RemoveAll(dir2pre) + defer s2pre.Shutdown() + + // Create the WAN link + joinWAN(t, s2pre, s1) + testrpc.WaitForLeader(t, s2pre.RPC, "dc2") + + // Restore the pre-1.6.1 behavior of the SigningKeyID not being derived + // from the intermediates. + { + state := s2pre.fsm.State() + + // Get the highest index + idx, roots, err := state.CARoots(nil) + require.NoError(err) + + var activeRoot *structs.CARoot + for _, root := range roots { + if root.Active { + activeRoot = root + } + } + require.NotNil(activeRoot) + + rootCert, err := connect.ParseCert(activeRoot.RootCert) + require.NoError(err) + + // Force this to be derived just from the root, not the intermediate. + activeRoot.SigningKeyID = connect.EncodeSigningKeyID(rootCert.SubjectKeyId) + + // Store the root cert in raft + resp, err := s2pre.raftApply(structs.ConnectCARequestType, &structs.CARequest{ + Op: structs.CAOpSetRoots, + Index: idx, + Roots: []*structs.CARoot{activeRoot}, + }) + require.NoError(err) + if respErr, ok := resp.(error); ok { + t.Fatalf("respErr: %v", respErr) + } + } + + // Shutdown s2pre and restart it to trigger the secondary CA init to correct + // the SigningKeyID. + s2pre.Shutdown() + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.DataDir = s2pre.config.DataDir + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc1" + c.NodeName = s2pre.config.NodeName + c.NodeID = s2pre.config.NodeID + }) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + testrpc.WaitForLeader(t, s2.RPC, "dc2") + + { // verify that the root is now corrected + provider, activeRoot := s2.getCAProvider() + require.NotNil(provider) + require.NotNil(activeRoot) + + activeIntermediate, err := provider.ActiveIntermediate() + require.NoError(err) + + intermediateCert, err := connect.ParseCert(activeIntermediate) + require.NoError(err) + + // Force this to be derived just from the root, not the intermediate. + expect := connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + require.Equal(expect, activeRoot.SigningKeyID) + } +} + func TestLeader_SecondaryCA_TransitionFromPrimary(t *testing.T) { t.Parallel() @@ -461,6 +570,52 @@ func TestLeader_SecondaryCA_UpgradeBeforePrimary(t *testing.T) { require.NoError(t, err) } +func waitForActiveCARoot(t *testing.T, s *Server, datacenter string, expect *structs.CARoot) { + retry.Run(t, func(r *retry.R) { + args := &structs.DCSpecificRequest{ + Datacenter: datacenter, + } + var reply structs.IndexedCARoots + if err := s.RPC("ConnectCA.Roots", args, &reply); err != nil { + r.Fatalf("err: %v", err) + } + + var root *structs.CARoot + for _, r := range reply.Roots { + if r.ID == reply.ActiveRootID { + root = r + break + } + } + if root == nil { + r.Fatal("no active root") + } + if root.ID != expect.ID { + r.Fatalf("current active root is %s; waiting for %s", root.ID, expect.ID) + } + }) +} + +func getTestRoots(s *Server, datacenter string) (*structs.IndexedCARoots, *structs.CARoot, error) { + rootReq := &structs.DCSpecificRequest{ + Datacenter: datacenter, + } + var rootList structs.IndexedCARoots + if err := s.RPC("ConnectCA.Roots", rootReq, &rootList); err != nil { + return nil, nil, err + } + + var active *structs.CARoot + for _, root := range rootList.Roots { + if root.Active { + active = root + break + } + } + + return &rootList, active, nil +} + func TestLeader_ReplicateIntentions(t *testing.T) { t.Parallel() diff --git a/agent/structs/connect_ca.go b/agent/structs/connect_ca.go index 2e47162bd..47e4947e1 100644 --- a/agent/structs/connect_ca.go +++ b/agent/structs/connect_ca.go @@ -60,8 +60,8 @@ type CARoot struct { SerialNumber uint64 // SigningKeyID is the ID of the public key that corresponds to the private - // key used to sign the certificate. Is is the HexString format of the raw - // AuthorityKeyID bytes. + // key used to sign leaf certificates. Is is the HexString format of the + // raw AuthorityKeyID bytes. SigningKeyID string // ExternalTrustDomain is the trust domain this root was generated under. It