Fix bug in leaf-cert cache type where multiple client tokens collide (#4736)

* Fix bug in leaf-cert cache type where multiple clients with different tokens would share certs and block incorrectly

* Use hash for issued certs key to avoid ambiguity concatenating
This commit is contained in:
Paul Banks 2018-10-03 21:13:28 +01:00
parent 1e4c5a1811
commit c5d5dbaf96
2 changed files with 218 additions and 3 deletions

View File

@ -1,6 +1,7 @@
package cachetype
import (
"crypto/sha256"
"errors"
"fmt"
"sync"
@ -27,6 +28,19 @@ type ConnectCALeaf struct {
Cache *cache.Cache // Cache that has CA root certs via ConnectCARoot
}
// issuedKey returns the issuedCerts cache key for a given service and token. We
// use a hash rather than concatenating strings to provide resilience against
// user input containing our separator - both service name and token ID can be
// freely manipulated by user so may contain any delimiter we choose. It also
// has the benefit of not leaking the ACL token to a new place in memory it
// might get accidentally dumped etc.
func issuedKey(service, token string) string {
hash := sha256.New()
hash.Write([]byte(service))
hash.Write([]byte(token))
return fmt.Sprintf("%x", hash.Sum(nil))
}
func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) {
var result cache.FetchResult
@ -48,11 +62,15 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
newRootCACh := make(chan error, 1)
go c.waitNewRootCA(reqReal.Datacenter, newRootCACh, opts.Timeout)
// Generate a cache key to lookup/store the cert. We MUST generate a new cert
// per token used to ensure revocation by ACL token is robust.
issuedKey := issuedKey(reqReal.Service, reqReal.Token)
// Get our prior cert (if we had one) and use that to determine our
// expiration time. If no cert exists, we expire immediately since we
// need to generate.
c.issuedCertsLock.RLock()
lastCert := c.issuedCerts[reqReal.Service]
lastCert := c.issuedCerts[issuedKey]
c.issuedCertsLock.RUnlock()
var leafExpiryCh <-chan time.Time
@ -62,6 +80,19 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
if expiryDur := lastCert.ValidBefore.Sub(time.Now()); expiryDur > 0 {
leafExpiryCh = time.After(expiryDur - 1*time.Hour)
// TODO(mitchellh): 1 hour buffer is hardcoded above
// We should not depend on the cache package de-duplicating requests for
// the same service/token (which is all we care about keying our local
// issued cert cache on) since it might later make sense to partition
// clients for other reasons too. So if the request has a 0 MinIndex, and
// the cached cert is still valid, then the client is expecting an
// immediate response and hasn't already seen the cached cert, return it
// now.
if opts.MinIndex == 0 {
result.Value = lastCert
result.Index = lastCert.ModifyIndex
return result, nil
}
}
}
@ -149,13 +180,13 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
// check just in case.
c.issuedCertsLock.Lock()
defer c.issuedCertsLock.Unlock()
lastCert = c.issuedCerts[reqReal.Service]
lastCert = c.issuedCerts[issuedKey]
if lastCert == nil || lastCert.ModifyIndex < reply.ModifyIndex {
if c.issuedCerts == nil {
c.issuedCerts = make(map[string]*structs.IssuedCert)
}
c.issuedCerts[reqReal.Service] = &reply
c.issuedCerts[issuedKey] = &reply
lastCert = &reply
}

View File

@ -58,6 +58,7 @@ func TestConnectCALeaf_changingRoots(t *testing.T) {
}
// Second fetch should block with set index
opts.MinIndex = 1
fetchCh = TestFetchCh(t, typ, opts, req)
select {
case result := <-fetchCh:
@ -156,6 +157,7 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
// Third fetch should block since the cert is not expiring and
// we also didn't update CA certs.
opts.MinIndex = 2
fetchCh = TestFetchCh(t, typ, opts, req)
select {
case result := <-fetchCh:
@ -164,6 +166,188 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
}
}
// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
// that subsequent clients get it returned immediately and don't block until it
// expires or their request times out. Note that typically FEtches at this level
// are de-duped by the cache higher up, but if the two clients are using
// different ACL tokens for example (common) that may not be the case, and we
// should wtill deliver correct blocking semantics to both.
//
// Additionally, we want to make sure that clients with different tokens
// generate distinct certs since we might later want to revoke all certs fetched
// with a given token but can't if a client using that token was served a cert
// generated under a different token (say the agent token).
func TestConnectCALeaf_multipleClientsDifferentTokens(t *testing.T) {
t.Parallel()
require := require.New(t)
rpc := TestRPC(t)
defer rpc.AssertExpectations(t)
typ, rootsCh := testCALeafType(t, rpc)
defer close(rootsCh)
rootsCh <- structs.IndexedCARoots{
ActiveRootID: "1",
TrustDomain: "fake-trust-domain.consul",
QueryMeta: structs.QueryMeta{Index: 1},
}
// Instrument ConnectCA.Sign to
var resp *structs.IssuedCert
var idx uint64
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
Run(func(args mock.Arguments) {
reply := args.Get(2).(*structs.IssuedCert)
reply.CreateIndex = atomic.AddUint64(&idx, 1)
reply.ModifyIndex = reply.CreateIndex
reply.ValidBefore = time.Now().Add(12 * time.Hour)
resp = reply
})
// We'll reuse the fetch options and request
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "A-token"}
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "B-token"}
// First fetch (Client A, MinIndex = 0) should return immediately
fetchCh := TestFetchCh(t, typ, opts, reqA)
var certA *structs.IssuedCert
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1,
}, result)
certA = result.(cache.FetchResult).Value.(*structs.IssuedCert)
}
// Second fetch (Client B, MinIndex = 0) should return immediately
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 2,
}, result)
// Different tokens should result in different certs. Note that we don't
// actually generate and sign real certs in this test with our mock RPC but
// this is enough to be sure we actually generated a different Private Key
// for each one and aren't just differnt due to index values.
require.NotEqual(certA.PrivateKeyPEM,
result.(cache.FetchResult).Value.(*structs.IssuedCert).PrivateKeyPEM)
}
// Third fetch (Client A, MinIndex = > 0) should block
opts.MinIndex = 2
fetchCh = TestFetchCh(t, typ, opts, reqA)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
// Fourth fetch (Client B, MinIndex = > 0) should block
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
}
// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
// that subsequent clients get it returned immediately and don't block until it
// expires or their request times out. Note that typically Fetches at this level
// are de-duped by the cache higher up, the test above explicitly tests the case
// where two clients with different tokens request the same cert. However two
// clients sharing a token _may_ share the certificate, but the cachetype should
// not implicitly depend on the cache mechanism de-duping these clients.
//
// Genrally we _shouldn't_ rely on implementation details in the cache package
// about partitioning to behave correctly as that is likely to lead to subtle
// errors later when the implementation there changes, so this test ensures that
// even if the cache for some reason decides to not share an existing cache
// entry with a second client despite using the same token, that we don't block
// it's initial request assuming that it's already recieved the in-memory and
// still valid cert.
func TestConnectCALeaf_multipleClientsSameToken(t *testing.T) {
t.Parallel()
require := require.New(t)
rpc := TestRPC(t)
defer rpc.AssertExpectations(t)
typ, rootsCh := testCALeafType(t, rpc)
defer close(rootsCh)
rootsCh <- structs.IndexedCARoots{
ActiveRootID: "1",
TrustDomain: "fake-trust-domain.consul",
QueryMeta: structs.QueryMeta{Index: 1},
}
// Instrument ConnectCA.Sign to
var resp *structs.IssuedCert
var idx uint64
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
Run(func(args mock.Arguments) {
reply := args.Get(2).(*structs.IssuedCert)
reply.CreateIndex = atomic.AddUint64(&idx, 1)
reply.ModifyIndex = reply.CreateIndex
reply.ValidBefore = time.Now().Add(12 * time.Hour)
resp = reply
})
// We'll reuse the fetch options and request
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
// First fetch (Client A, MinIndex = 0) should return immediately
fetchCh := TestFetchCh(t, typ, opts, reqA)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1,
}, result)
}
// Second fetch (Client B, MinIndex = 0) should return immediately
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1, // Same result as last fetch
}, result)
}
// Third fetch (Client A, MinIndex = > 0) should block
opts.MinIndex = 1
fetchCh = TestFetchCh(t, typ, opts, reqA)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
// Fourth fetch (Client B, MinIndex = > 0) should block
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
}
// testCALeafType returns a *ConnectCALeaf that is pre-configured to
// use the given RPC implementation for "ConnectCA.Sign" operations.
func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) {