Fix bug in leaf-cert cache type where multiple client tokens collide (#4736)
* Fix bug in leaf-cert cache type where multiple clients with different tokens would share certs and block incorrectly * Use hash for issued certs key to avoid ambiguity concatenating
This commit is contained in:
parent
1e4c5a1811
commit
c5d5dbaf96
|
@ -1,6 +1,7 @@
|
|||
package cachetype
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
@ -27,6 +28,19 @@ type ConnectCALeaf struct {
|
|||
Cache *cache.Cache // Cache that has CA root certs via ConnectCARoot
|
||||
}
|
||||
|
||||
// issuedKey returns the issuedCerts cache key for a given service and token. We
|
||||
// use a hash rather than concatenating strings to provide resilience against
|
||||
// user input containing our separator - both service name and token ID can be
|
||||
// freely manipulated by user so may contain any delimiter we choose. It also
|
||||
// has the benefit of not leaking the ACL token to a new place in memory it
|
||||
// might get accidentally dumped etc.
|
||||
func issuedKey(service, token string) string {
|
||||
hash := sha256.New()
|
||||
hash.Write([]byte(service))
|
||||
hash.Write([]byte(token))
|
||||
return fmt.Sprintf("%x", hash.Sum(nil))
|
||||
}
|
||||
|
||||
func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) {
|
||||
var result cache.FetchResult
|
||||
|
||||
|
@ -48,11 +62,15 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
|
|||
newRootCACh := make(chan error, 1)
|
||||
go c.waitNewRootCA(reqReal.Datacenter, newRootCACh, opts.Timeout)
|
||||
|
||||
// Generate a cache key to lookup/store the cert. We MUST generate a new cert
|
||||
// per token used to ensure revocation by ACL token is robust.
|
||||
issuedKey := issuedKey(reqReal.Service, reqReal.Token)
|
||||
|
||||
// Get our prior cert (if we had one) and use that to determine our
|
||||
// expiration time. If no cert exists, we expire immediately since we
|
||||
// need to generate.
|
||||
c.issuedCertsLock.RLock()
|
||||
lastCert := c.issuedCerts[reqReal.Service]
|
||||
lastCert := c.issuedCerts[issuedKey]
|
||||
c.issuedCertsLock.RUnlock()
|
||||
|
||||
var leafExpiryCh <-chan time.Time
|
||||
|
@ -62,6 +80,19 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
|
|||
if expiryDur := lastCert.ValidBefore.Sub(time.Now()); expiryDur > 0 {
|
||||
leafExpiryCh = time.After(expiryDur - 1*time.Hour)
|
||||
// TODO(mitchellh): 1 hour buffer is hardcoded above
|
||||
|
||||
// We should not depend on the cache package de-duplicating requests for
|
||||
// the same service/token (which is all we care about keying our local
|
||||
// issued cert cache on) since it might later make sense to partition
|
||||
// clients for other reasons too. So if the request has a 0 MinIndex, and
|
||||
// the cached cert is still valid, then the client is expecting an
|
||||
// immediate response and hasn't already seen the cached cert, return it
|
||||
// now.
|
||||
if opts.MinIndex == 0 {
|
||||
result.Value = lastCert
|
||||
result.Index = lastCert.ModifyIndex
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -149,13 +180,13 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
|
|||
// check just in case.
|
||||
c.issuedCertsLock.Lock()
|
||||
defer c.issuedCertsLock.Unlock()
|
||||
lastCert = c.issuedCerts[reqReal.Service]
|
||||
lastCert = c.issuedCerts[issuedKey]
|
||||
if lastCert == nil || lastCert.ModifyIndex < reply.ModifyIndex {
|
||||
if c.issuedCerts == nil {
|
||||
c.issuedCerts = make(map[string]*structs.IssuedCert)
|
||||
}
|
||||
|
||||
c.issuedCerts[reqReal.Service] = &reply
|
||||
c.issuedCerts[issuedKey] = &reply
|
||||
lastCert = &reply
|
||||
}
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ func TestConnectCALeaf_changingRoots(t *testing.T) {
|
|||
}
|
||||
|
||||
// Second fetch should block with set index
|
||||
opts.MinIndex = 1
|
||||
fetchCh = TestFetchCh(t, typ, opts, req)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
|
@ -156,6 +157,7 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
|
|||
|
||||
// Third fetch should block since the cert is not expiring and
|
||||
// we also didn't update CA certs.
|
||||
opts.MinIndex = 2
|
||||
fetchCh = TestFetchCh(t, typ, opts, req)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
|
@ -164,6 +166,188 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
|
||||
// that subsequent clients get it returned immediately and don't block until it
|
||||
// expires or their request times out. Note that typically FEtches at this level
|
||||
// are de-duped by the cache higher up, but if the two clients are using
|
||||
// different ACL tokens for example (common) that may not be the case, and we
|
||||
// should wtill deliver correct blocking semantics to both.
|
||||
//
|
||||
// Additionally, we want to make sure that clients with different tokens
|
||||
// generate distinct certs since we might later want to revoke all certs fetched
|
||||
// with a given token but can't if a client using that token was served a cert
|
||||
// generated under a different token (say the agent token).
|
||||
func TestConnectCALeaf_multipleClientsDifferentTokens(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
rpc := TestRPC(t)
|
||||
defer rpc.AssertExpectations(t)
|
||||
|
||||
typ, rootsCh := testCALeafType(t, rpc)
|
||||
defer close(rootsCh)
|
||||
rootsCh <- structs.IndexedCARoots{
|
||||
ActiveRootID: "1",
|
||||
TrustDomain: "fake-trust-domain.consul",
|
||||
QueryMeta: structs.QueryMeta{Index: 1},
|
||||
}
|
||||
|
||||
// Instrument ConnectCA.Sign to
|
||||
var resp *structs.IssuedCert
|
||||
var idx uint64
|
||||
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
|
||||
Run(func(args mock.Arguments) {
|
||||
reply := args.Get(2).(*structs.IssuedCert)
|
||||
reply.CreateIndex = atomic.AddUint64(&idx, 1)
|
||||
reply.ModifyIndex = reply.CreateIndex
|
||||
reply.ValidBefore = time.Now().Add(12 * time.Hour)
|
||||
resp = reply
|
||||
})
|
||||
|
||||
// We'll reuse the fetch options and request
|
||||
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
|
||||
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "A-token"}
|
||||
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "B-token"}
|
||||
|
||||
// First fetch (Client A, MinIndex = 0) should return immediately
|
||||
fetchCh := TestFetchCh(t, typ, opts, reqA)
|
||||
var certA *structs.IssuedCert
|
||||
select {
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("shouldn't block waiting for fetch")
|
||||
case result := <-fetchCh:
|
||||
require.Equal(cache.FetchResult{
|
||||
Value: resp,
|
||||
Index: 1,
|
||||
}, result)
|
||||
certA = result.(cache.FetchResult).Value.(*structs.IssuedCert)
|
||||
}
|
||||
|
||||
// Second fetch (Client B, MinIndex = 0) should return immediately
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqB)
|
||||
select {
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("shouldn't block waiting for fetch")
|
||||
case result := <-fetchCh:
|
||||
require.Equal(cache.FetchResult{
|
||||
Value: resp,
|
||||
Index: 2,
|
||||
}, result)
|
||||
// Different tokens should result in different certs. Note that we don't
|
||||
// actually generate and sign real certs in this test with our mock RPC but
|
||||
// this is enough to be sure we actually generated a different Private Key
|
||||
// for each one and aren't just differnt due to index values.
|
||||
require.NotEqual(certA.PrivateKeyPEM,
|
||||
result.(cache.FetchResult).Value.(*structs.IssuedCert).PrivateKeyPEM)
|
||||
}
|
||||
|
||||
// Third fetch (Client A, MinIndex = > 0) should block
|
||||
opts.MinIndex = 2
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqA)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
t.Fatalf("should not return: %#v", result)
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
|
||||
// Fourth fetch (Client B, MinIndex = > 0) should block
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqB)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
t.Fatalf("should not return: %#v", result)
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
|
||||
// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
|
||||
// that subsequent clients get it returned immediately and don't block until it
|
||||
// expires or their request times out. Note that typically Fetches at this level
|
||||
// are de-duped by the cache higher up, the test above explicitly tests the case
|
||||
// where two clients with different tokens request the same cert. However two
|
||||
// clients sharing a token _may_ share the certificate, but the cachetype should
|
||||
// not implicitly depend on the cache mechanism de-duping these clients.
|
||||
//
|
||||
// Genrally we _shouldn't_ rely on implementation details in the cache package
|
||||
// about partitioning to behave correctly as that is likely to lead to subtle
|
||||
// errors later when the implementation there changes, so this test ensures that
|
||||
// even if the cache for some reason decides to not share an existing cache
|
||||
// entry with a second client despite using the same token, that we don't block
|
||||
// it's initial request assuming that it's already recieved the in-memory and
|
||||
// still valid cert.
|
||||
func TestConnectCALeaf_multipleClientsSameToken(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
rpc := TestRPC(t)
|
||||
defer rpc.AssertExpectations(t)
|
||||
|
||||
typ, rootsCh := testCALeafType(t, rpc)
|
||||
defer close(rootsCh)
|
||||
rootsCh <- structs.IndexedCARoots{
|
||||
ActiveRootID: "1",
|
||||
TrustDomain: "fake-trust-domain.consul",
|
||||
QueryMeta: structs.QueryMeta{Index: 1},
|
||||
}
|
||||
|
||||
// Instrument ConnectCA.Sign to
|
||||
var resp *structs.IssuedCert
|
||||
var idx uint64
|
||||
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
|
||||
Run(func(args mock.Arguments) {
|
||||
reply := args.Get(2).(*structs.IssuedCert)
|
||||
reply.CreateIndex = atomic.AddUint64(&idx, 1)
|
||||
reply.ModifyIndex = reply.CreateIndex
|
||||
reply.ValidBefore = time.Now().Add(12 * time.Hour)
|
||||
resp = reply
|
||||
})
|
||||
|
||||
// We'll reuse the fetch options and request
|
||||
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
|
||||
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
|
||||
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
|
||||
|
||||
// First fetch (Client A, MinIndex = 0) should return immediately
|
||||
fetchCh := TestFetchCh(t, typ, opts, reqA)
|
||||
select {
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("shouldn't block waiting for fetch")
|
||||
case result := <-fetchCh:
|
||||
require.Equal(cache.FetchResult{
|
||||
Value: resp,
|
||||
Index: 1,
|
||||
}, result)
|
||||
}
|
||||
|
||||
// Second fetch (Client B, MinIndex = 0) should return immediately
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqB)
|
||||
select {
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("shouldn't block waiting for fetch")
|
||||
case result := <-fetchCh:
|
||||
require.Equal(cache.FetchResult{
|
||||
Value: resp,
|
||||
Index: 1, // Same result as last fetch
|
||||
}, result)
|
||||
}
|
||||
|
||||
// Third fetch (Client A, MinIndex = > 0) should block
|
||||
opts.MinIndex = 1
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqA)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
t.Fatalf("should not return: %#v", result)
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
|
||||
// Fourth fetch (Client B, MinIndex = > 0) should block
|
||||
fetchCh = TestFetchCh(t, typ, opts, reqB)
|
||||
select {
|
||||
case result := <-fetchCh:
|
||||
t.Fatalf("should not return: %#v", result)
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
|
||||
// testCALeafType returns a *ConnectCALeaf that is pre-configured to
|
||||
// use the given RPC implementation for "ConnectCA.Sign" operations.
|
||||
func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) {
|
||||
|
|
Loading…
Reference in New Issue