agent: tolerate partial restore failure from persistent cache (#12718)

* agent: tolerate partial restore failure from persistent cache

* Review comments: improved consistency, test robustness, comments, assertions
This commit is contained in:
Tom Proctor 2021-10-08 11:30:04 +01:00 committed by GitHub
parent 65d0718a17
commit 0180ba2984
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 132 additions and 66 deletions

View File

@ -1,4 +1,3 @@
+```release-note:improvement
+auth/kubernetes: validate JWT against the provided role on alias look ahead operations
+```
```release-note:improvement
auth/kubernetes: validate JWT against the provided role on alias look ahead operations
```

3
changelog/12718.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
agent/cache: tolerate partial restore failure from persistent cache
```

View File

@ -17,6 +17,7 @@ import (
"time"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-secure-stdlib/base62"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/command/agent/cache/cacheboltdb"
@ -969,56 +970,69 @@ func (c *LeaseCache) Flush() error {
// tokens first, since restoring a lease's renewal context and watcher requires
// looking up the token in the cachememdb.
func (c *LeaseCache) Restore(ctx context.Context, storage *cacheboltdb.BoltStorage) error {
var errors *multierror.Error
// Process tokens first
tokens, err := storage.GetByType(ctx, cacheboltdb.TokenType)
if err != nil {
return err
}
if err := c.restoreTokens(tokens); err != nil {
return err
errors = multierror.Append(errors, err)
} else {
if err := c.restoreTokens(tokens); err != nil {
errors = multierror.Append(errors, err)
}
}
// Then process auth leases
authLeases, err := storage.GetByType(ctx, cacheboltdb.AuthLeaseType)
if err != nil {
return err
}
if err := c.restoreLeases(authLeases); err != nil {
return err
errors = multierror.Append(errors, err)
} else {
if err := c.restoreLeases(authLeases); err != nil {
errors = multierror.Append(errors, err)
}
}
// Then process secret leases
secretLeases, err := storage.GetByType(ctx, cacheboltdb.SecretLeaseType)
if err != nil {
return err
}
if err := c.restoreLeases(secretLeases); err != nil {
return err
errors = multierror.Append(errors, err)
} else {
if err := c.restoreLeases(secretLeases); err != nil {
errors = multierror.Append(errors, err)
}
}
return nil
return errors.ErrorOrNil()
}
func (c *LeaseCache) restoreTokens(tokens [][]byte) error {
var errors *multierror.Error
for _, token := range tokens {
newIndex, err := cachememdb.Deserialize(token)
if err != nil {
return err
errors = multierror.Append(errors, err)
continue
}
newIndex.RenewCtxInfo = c.createCtxInfo(nil)
if err := c.db.Set(newIndex); err != nil {
return err
errors = multierror.Append(errors, err)
continue
}
c.logger.Trace("restored token", "id", newIndex.ID)
}
return nil
return errors.ErrorOrNil()
}
func (c *LeaseCache) restoreLeases(leases [][]byte) error {
var errors *multierror.Error
for _, lease := range leases {
newIndex, err := cachememdb.Deserialize(lease)
if err != nil {
return err
errors = multierror.Append(errors, err)
continue
}
// Check if this lease has already expired
@ -1031,14 +1045,17 @@ func (c *LeaseCache) restoreLeases(leases [][]byte) error {
}
if err := c.restoreLeaseRenewCtx(newIndex); err != nil {
return err
errors = multierror.Append(errors, err)
continue
}
if err := c.db.Set(newIndex); err != nil {
return err
errors = multierror.Append(errors, err)
continue
}
c.logger.Trace("restored lease", "id", newIndex.ID, "path", newIndex.RequestPath)
}
return nil
return errors.ErrorOrNil()
}
// restoreLeaseRenewCtx re-creates a RenewCtx for an index object and starts

View File

@ -16,6 +16,7 @@ import (
"github.com/go-test/deep"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/command/agent/cache/cacheboltdb"
"github.com/hashicorp/vault/command/agent/cache/cachememdb"
@ -711,13 +712,20 @@ func setupBoltStorage(t *testing.T) (tempCacheDir string, boltStorage *cachebolt
}
func TestLeaseCache_PersistAndRestore(t *testing.T) {
// Emulate 4 responses from the api proxy. The first two use the auto-auth
// token, and the last two use another token.
// Emulate responses from the api proxy. The first two use the auto-auth
// token, and the others use another token.
// The test re-sends each request to ensure that the response is cached
// so the number of responses and cacheTests specified should always be equal.
responses := []*SendResponse{
newTestSendResponse(200, `{"auth": {"client_token": "testtoken", "renewable": true, "lease_duration": 600}}`),
newTestSendResponse(201, `{"lease_id": "foo", "renewable": true, "data": {"value": "foo"}, "lease_duration": 600}`),
// The auth token will get manually deleted from the bolt DB storage, causing both of the following two responses
// to be missing from the cache after a restore, because the lease is a child of the auth token.
newTestSendResponse(202, `{"auth": {"client_token": "testtoken2", "renewable": true, "orphan": true, "lease_duration": 600}}`),
newTestSendResponse(203, `{"lease_id": "secret2-lease", "renewable": true, "data": {"number": "two"}, "lease_duration": 600}`),
// 204 No content gets special handling - avoid.
newTestSendResponse(250, `{"auth": {"client_token": "testtoken3", "renewable": true, "orphan": true, "lease_duration": 600}}`),
newTestSendResponse(251, `{"lease_id": "secret3-lease", "renewable": true, "data": {"number": "three"}, "lease_duration": 600}`),
}
tempDir, boltStorage := setupBoltStorage(t)
@ -726,59 +734,82 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) {
lc := testNewLeaseCacheWithPersistence(t, responses, boltStorage)
// Register an auto-auth token so that the token and lease requests are cached
lc.RegisterAutoAuthToken("autoauthtoken")
err := lc.RegisterAutoAuthToken("autoauthtoken")
require.NoError(t, err)
cacheTests := []struct {
token string
method string
urlPath string
body string
wantStatusCode int
token string
method string
urlPath string
body string
deleteFromPersistentStore bool // If true, will be deleted from bolt DB to induce an error on restore
expectMissingAfterRestore bool // If true, the response is not expected to be present in the restored cache
}{
{
// Make a request. A response with a new token is returned to the
// lease cache and that will be cached.
token: "autoauthtoken",
method: "GET",
urlPath: "http://example.com/v1/sample/api",
body: `{"value": "input"}`,
wantStatusCode: responses[0].Response.StatusCode,
token: "autoauthtoken",
method: "GET",
urlPath: "http://example.com/v1/sample/api",
body: `{"value": "input"}`,
},
{
// Modify the request a little bit to ensure the second response is
// returned to the lease cache.
token: "autoauthtoken",
method: "GET",
urlPath: "http://example.com/v1/sample/api",
body: `{"value": "input_changed"}`,
wantStatusCode: responses[1].Response.StatusCode,
token: "autoauthtoken",
method: "GET",
urlPath: "http://example.com/v1/sample/api",
body: `{"value": "input_changed"}`,
},
{
// Simulate an approle login to get another token
method: "PUT",
urlPath: "http://example.com/v1/auth/approle/login",
body: `{"role_id": "my role", "secret_id": "my secret"}`,
wantStatusCode: responses[2].Response.StatusCode,
method: "PUT",
urlPath: "http://example.com/v1/auth/approle-expect-missing/login",
body: `{"role_id": "my role", "secret_id": "my secret"}`,
deleteFromPersistentStore: true,
expectMissingAfterRestore: true,
},
{
// Test caching with the token acquired from the approle login
token: "testtoken2",
method: "GET",
urlPath: "http://example.com/v1/sample2/api",
body: `{"second": "input"}`,
wantStatusCode: responses[3].Response.StatusCode,
token: "testtoken2",
method: "GET",
urlPath: "http://example.com/v1/sample-expect-missing/api",
body: `{"second": "input"}`,
// This will be missing from the restored cache because its parent token was deleted
expectMissingAfterRestore: true,
},
{
// Simulate another approle login to get another token
method: "PUT",
urlPath: "http://example.com/v1/auth/approle/login",
body: `{"role_id": "my role", "secret_id": "my secret"}`,
},
{
// Test caching with the token acquired from the latest approle login
token: "testtoken3",
method: "GET",
urlPath: "http://example.com/v1/sample3/api",
body: `{"third": "input"}`,
},
}
for _, ct := range cacheTests {
var deleteIDs []string
for i, ct := range cacheTests {
// Send once to cache
sendReq := &SendRequest{
Token: ct.token,
Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)),
}
if ct.deleteFromPersistentStore {
deleteID, err := computeIndexID(sendReq)
require.NoError(t, err)
deleteIDs = append(deleteIDs, deleteID)
// Now reset the body after calculating the index
sendReq.Request = httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body))
}
resp, err := lc.Send(context.Background(), sendReq)
require.NoError(t, err)
assert.Equal(t, resp.Response.StatusCode, ct.wantStatusCode, "expected proxied response")
assert.Equal(t, responses[i].Response.StatusCode, resp.Response.StatusCode, "expected proxied response")
assert.Nil(t, resp.CacheMeta)
// Send again to test cache. If this isn't cached, the response returned
@ -789,24 +820,36 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) {
}
respCached, err := lc.Send(context.Background(), sendCacheReq)
require.NoError(t, err, "failed to send request %+v", ct)
assert.Equal(t, respCached.Response.StatusCode, ct.wantStatusCode, "expected proxied response")
assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response")
require.NotNil(t, respCached.CacheMeta)
assert.True(t, respCached.CacheMeta.Hit)
}
// Now we know the cache is working, so try restoring from the persisted
// cache's storage
restoredCache := testNewLeaseCache(t, nil)
require.NotEmpty(t, deleteIDs)
for _, deleteID := range deleteIDs {
err = boltStorage.Delete(deleteID)
require.NoError(t, err)
}
err := restoredCache.Restore(context.Background(), boltStorage)
assert.NoError(t, err)
// Now we know the cache is working, so try restoring from the persisted
// cache's storage. Responses 3 and 4 have been cleared from the cache, so
// re-send those.
restoredCache := testNewLeaseCache(t, responses[2:4])
err = restoredCache.Restore(context.Background(), boltStorage)
errors, ok := err.(*multierror.Error)
require.True(t, ok)
assert.Len(t, errors.Errors, 1)
assert.Contains(t, errors.Error(), "could not find parent Token testtoken2")
// Now compare before and after
beforeDB, err := lc.db.GetByPrefix(cachememdb.IndexNameID)
require.NoError(t, err)
assert.Len(t, beforeDB, 5)
assert.Len(t, beforeDB, 7)
for _, cachedItem := range beforeDB {
if strings.Contains(cachedItem.RequestPath, "expect-missing") {
continue
}
restoredItem, err := restoredCache.db.Get(cachememdb.IndexNameID, cachedItem.ID)
require.NoError(t, err)
@ -838,17 +881,21 @@ func TestLeaseCache_PersistAndRestore(t *testing.T) {
assert.Len(t, afterDB, 5)
// And finally send the cache requests once to make sure they're all being
// served from the restoredCache
for _, ct := range cacheTests {
// served from the restoredCache unless they were intended to be missing after restore.
for i, ct := range cacheTests {
sendCacheReq := &SendRequest{
Token: ct.token,
Request: httptest.NewRequest(ct.method, ct.urlPath, strings.NewReader(ct.body)),
}
respCached, err := restoredCache.Send(context.Background(), sendCacheReq)
require.NoError(t, err, "failed to send request %+v", ct)
assert.Equal(t, respCached.Response.StatusCode, ct.wantStatusCode, "expected proxied response")
require.NotNil(t, respCached.CacheMeta)
assert.True(t, respCached.CacheMeta.Hit)
assert.Equal(t, responses[i].Response.StatusCode, respCached.Response.StatusCode, "expected proxied response")
if ct.expectMissingAfterRestore {
require.Nil(t, respCached.CacheMeta)
} else {
require.NotNil(t, respCached.CacheMeta)
assert.True(t, respCached.CacheMeta.Hit)
}
}
}