add MustRevalidate flag to connect_ca_leaf cache type; always use on non-blocking queries (#11693)

* always use MustRevalidate on non-blocking queries for connect ca leaf

Signed-off-by: FFMMM <FFMMM@users.noreply.github.com>

* Update agent/agent_endpoint_test.go

Co-authored-by: Daniel Nephin <dnephin@hashicorp.com>

* pr feedback

Signed-off-by: FFMMM <FFMMM@users.noreply.github.com>

Co-authored-by: Daniel Nephin <dnephin@hashicorp.com>
This commit is contained in:
FFMMM 2021-12-02 11:32:15 -08:00 committed by GitHub
parent eff3dc09b6
commit 38c457b486
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 105 additions and 45 deletions

3
.changelog/11693.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
ca: fixes a bug that caused non blocking leaf cert queries to return the same cached response regardless of ca rotation or leaf cert expiry
```

View File

@ -1494,7 +1494,9 @@ func (s *HTTPHandlers) AgentConnectCARoots(resp http.ResponseWriter, req *http.R
}
// AgentConnectCALeafCert returns the certificate bundle for a service
// instance. This supports blocking queries to update the returned bundle.
// instance. This endpoint ignores all "Cache-Control" attributes.
// This supports blocking queries to update the returned bundle.
// Non-blocking queries will always verify that the cache entry is still valid.
func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Get the service name. Note that this is the name of the service,
// not the ID of the service instance.
@ -1518,6 +1520,14 @@ func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *htt
args.MaxQueryTime = qOpts.MaxQueryTime
args.Token = qOpts.Token
// TODO(ffmmmm): maybe set MustRevalidate in ConnectCALeafRequest (as part of CacheInfo())
// We don't want non-blocking queries to return expired leaf certs
// or leaf certs not valid under the current CA. So always revalidate
// the leaf cert on non-blocking queries (ie when MinQueryIndex == 0)
if args.MinQueryIndex == 0 {
args.MustRevalidate = true
}
if !s.validateRequestPartition(resp, &args.EnterpriseMeta) {
return nil, nil
}

View File

@ -5797,16 +5797,13 @@ func TestAgentConnectCALeafCert_good(t *testing.T) {
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}
// Set a new CA
ca2 := connect.TestCAConfigSet(t, a, nil)
// Issue a blocking query to ensure that the cert gets updated appropriately
{
// Set a new CA
ca := connect.TestCAConfigSet(t, a, nil)
resp := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil)
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
@ -5816,12 +5813,59 @@ func TestAgentConnectCALeafCert_good(t *testing.T) {
require.NotEqual(issued.PrivateKeyPEM, issued2.PrivateKeyPEM)
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca)
requireLeafValidUnderCA(t, issued2, ca2)
// Should not be a cache hit! The data was updated in response to the blocking
// query being made.
require.Equal("MISS", resp.Header().Get("X-Cache"))
}
t.Run("test non-blocking queries update leaf cert", func(t *testing.T) {
resp := httptest.NewRecorder()
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
// Get the issued cert
issued, ok := obj.(*structs.IssuedCert)
assert.True(ok)
// Verify that the cert is signed by the CA
requireLeafValidUnderCA(t, issued, ca2)
// Issue a non blocking query to ensure that the cert gets updated appropriately
{
// Set a new CA
ca3 := connect.TestCAConfigSet(t, a, nil)
resp := httptest.NewRecorder()
req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
require.NoError(err)
obj, err = a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
issued2 := obj.(*structs.IssuedCert)
require.NotEqual(issued.CertPEM, issued2.CertPEM)
require.NotEqual(issued.PrivateKeyPEM, issued2.PrivateKeyPEM)
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca3)
// Should not be a cache hit!
require.Equal("MISS", resp.Header().Get("X-Cache"))
}
// Test caching for the leaf cert
{
for fetched := 0; fetched < 4; fetched++ {
// Fetch it again
resp := httptest.NewRecorder()
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
}
}
})
}
// Test we can request a leaf cert for a service we have permission for
@ -5894,9 +5938,6 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}
// Test Blocking - see https://github.com/hashicorp/consul/issues/4462
@ -5944,12 +5985,7 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca)
// Should be a cache hit! The data should've updated in the cache
// in the background so this should've been fetched directly from
// the cache.
if resp.Header().Get("X-Cache") != "HIT" {
r.Fatalf("should be a cache hit")
}
require.NotEqual(issued, issued2)
})
}
}
@ -6044,9 +6080,6 @@ func TestAgentConnectCALeafCert_Vault_doesNotChurnLeafCertsAtIdle(t *testing.T)
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}
// Test that we aren't churning leaves for no reason at idle.
@ -6153,7 +6186,8 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
}
// List
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
require.NoError(err)
resp := httptest.NewRecorder()
obj, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
@ -6178,9 +6212,6 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
obj2, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}
// Test that we aren't churning leaves for no reason at idle.
@ -6245,12 +6276,7 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, dc1_ca2)
// Should be a cache hit! The data should've updated in the cache
// in the background so this should've been fetched directly from
// the cache.
if resp.Header().Get("X-Cache") != "HIT" {
r.Fatalf("should be a cache hit")
}
require.NotEqual(issued, issued2)
})
}

View File

@ -380,6 +380,25 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
return c.generateNewLeaf(reqReal, lastResultWithNewState())
}
// If we called Fetch() with MustRevalidate then this call came from a non-blocking query.
// Any prior CA rotations should've already expired the cert.
// All we need to do is check whether the current CA is the one that signed the leaf. If not, generate a new leaf.
// This is not a perfect solution (as a CA rotation update can be missed) but it should take care of instances like
// see https://github.com/hashicorp/consul/issues/10871, https://github.com/hashicorp/consul/issues/9862
// This seems to me like a hack, so maybe we can revisit the caching/ fetching logic in this case
if req.CacheInfo().MustRevalidate {
roots, err := c.rootsFromCache()
if err != nil {
return lastResultWithNewState(), err
}
if activeRootHasKey(roots, state.authorityKeyID) {
return lastResultWithNewState(), nil
}
// if we reach here then the current leaf was not signed by the same CAs, just regen
return c.generateNewLeaf(reqReal, lastResultWithNewState())
}
// We are about to block and wait for a change or timeout.
// Make a chan we can be notified of changes to CA roots on. It must be
@ -401,7 +420,7 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
c.fetchStart(rootUpdateCh)
defer c.fetchDone(rootUpdateCh)
// Setup the timeout chan outside the loop so we don't keep bumping the timout
// Setup the timeout chan outside the loop so we don't keep bumping the timeout
// later if we loop around.
timeoutCh := time.After(opts.Timeout)
@ -492,7 +511,7 @@ func (c *ConnectCALeaf) rootsFromCache() (*structs.IndexedCARoots, error) {
// generateNewLeaf does the actual work of creating a new private key,
// generating a CSR and getting it signed by the servers. result argument
// represents the last result currently in cache if any along with it's state.
// represents the last result currently in cache if any along with its state.
func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest,
result cache.FetchResult) (cache.FetchResult, error) {
@ -643,14 +662,15 @@ func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest,
// since this is only used for cache-related requests and not forwarded
// directly to any Consul servers.
type ConnectCALeafRequest struct {
Token string
Datacenter string
Service string // Service name, not ID
Agent string // Agent name, not ID
DNSSAN []string
IPSAN []net.IP
MinQueryIndex uint64
MaxQueryTime time.Duration
Token string
Datacenter string
Service string // Service name, not ID
Agent string // Agent name, not ID
DNSSAN []string
IPSAN []net.IP
MinQueryIndex uint64
MaxQueryTime time.Duration
MustRevalidate bool
structs.EnterpriseMeta
}
@ -684,10 +704,11 @@ func (req *ConnectCALeafRequest) TargetPartition() string {
func (r *ConnectCALeafRequest) CacheInfo() cache.RequestInfo {
return cache.RequestInfo{
Token: r.Token,
Key: r.Key(),
Datacenter: r.Datacenter,
MinIndex: r.MinQueryIndex,
Timeout: r.MaxQueryTime,
Token: r.Token,
Key: r.Key(),
Datacenter: r.Datacenter,
MinIndex: r.MinQueryIndex,
Timeout: r.MaxQueryTime,
MustRevalidate: r.MustRevalidate,
}
}