diff --git a/vault/expiration.go b/vault/expiration.go index f0f885edb..73b47b409 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -6,14 +6,18 @@ import ( "path" "strings" "sync" + "sync/atomic" "time" "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" + "github.com/hashicorp/errwrap" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/helper/consts" "github.com/hashicorp/vault/helper/jsonutil" + "github.com/hashicorp/vault/helper/locksutil" "github.com/hashicorp/vault/logical" ) @@ -57,6 +61,8 @@ type ExpirationManager struct { pending map[string]*time.Timer pendingLock sync.Mutex + + tidyLock int64 } // NewExpirationManager creates a new ExpirationManager that is backed @@ -114,6 +120,113 @@ func (c *Core) stopExpiration() error { return nil } +// Tidy cleans up the dangling storage entries for leases. It scans the storage +// view to find all the available leases, checks if the token embedded in it is +// either empty or invalid and in both the cases, it revokes them. It also uses +// a token cache to avoid multiple lookups of the same token ID. It is normally +// not required to use the API that invokes this. This is only intended to +// clean up the corrupt storage due to bugs. +func (m *ExpirationManager) Tidy() error { + var tidyErrors *multierror.Error + + if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { + m.logger.Warn("expiration: tidy operation on leases is already in progress") + return fmt.Errorf("tidy operation on leases is already in progress") + } + + defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) + + m.logger.Info("expiration: beginning tidy operation on leases") + defer m.logger.Info("expiration: finished tidy operation on leases") + + // Create a cache to keep track of looked up tokens + tokenCache := make(map[string]bool) + var countLease, revokedCount, deletedCountInvalidToken, deletedCountEmptyToken int64 + + tidyFunc := func(leaseID string) { + countLease++ + if countLease%500 == 0 { + m.logger.Info("expiration: tidying leases", "progress", countLease) + } + + le, err := m.loadEntry(leaseID) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err)) + return + } + + if le == nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("nil entry for lease ID %q: %v", leaseID, err)) + return + } + + var isValid, ok bool + revokeLease := false + if le.ClientToken == "" { + m.logger.Trace("expiration: revoking lease which has an empty token", "lease_id", leaseID) + revokeLease = true + deletedCountEmptyToken++ + goto REVOKE_CHECK + } + + isValid, ok = tokenCache[le.ClientToken] + if !ok { + saltedID := m.tokenStore.SaltID(le.ClientToken) + lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) + lock.RLock() + te, err := m.tokenStore.lookupSalted(saltedID, true) + lock.RUnlock() + + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) + return + } + + if te == nil { + m.logger.Trace("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) + revokeLease = true + deletedCountInvalidToken++ + tokenCache[le.ClientToken] = false + } else { + tokenCache[le.ClientToken] = true + } + goto REVOKE_CHECK + } else { + if isValid { + return + } else { + m.logger.Trace("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) + revokeLease = true + deletedCountInvalidToken++ + } + goto REVOKE_CHECK + } + + REVOKE_CHECK: + if revokeLease { + // Force the revocation and skip going through the token store + // again + err = m.revokeCommon(leaseID, true, true) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err)) + return + } + revokedCount++ + } + } + + if err := logical.ScanView(m.idView, tidyFunc); err != nil { + return err + } + + m.logger.Debug("expiration: number of leases scanned", "count", countLease) + m.logger.Debug("expiration: number of leases which had empty tokens", "count", deletedCountEmptyToken) + m.logger.Debug("expiration: number of leases which had invalid tokens", "count", deletedCountInvalidToken) + m.logger.Debug("expiration: number of leases successfully revoked", "count", revokedCount) + + return tidyErrors.ErrorOrNil() +} + // Restore is used to recover the lease states when starting. // This is used after starting the vault. func (m *ExpirationManager) Restore() error { @@ -498,8 +611,13 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke // Register is used to take a request and response with an associated // lease. The secret gets assigned a LeaseID and the management of // of lease is assumed by the expiration manager. -func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (string, error) { +func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) { defer metrics.MeasureSince([]string{"expire", "register"}, time.Now()) + + if req.ClientToken == "" { + return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") + } + // Ignore if there is no leased secret if resp == nil || resp.Secret == nil { return "", nil @@ -515,8 +633,34 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons if err != nil { return "", err } + + leaseID := path.Join(req.Path, leaseUUID) + + defer func() { + // If there is an error we want to rollback as much as possible (note + // that errors here are ignored to do as much cleanup as we can). We + // want to revoke a generated secret (since an error means we may not + // be successfully tracking it), remove indexes, and delete the entry. + if retErr != nil { + revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) + if err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional internal error was encountered revoking the newly-generated secret: {{err}}", err)) + } else if revResp != nil && revResp.IsError() { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error())) + } + + if err := m.deleteEntry(leaseID); err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered deleting any lease associated with the newly-generated secret: {{err}}", err)) + } + + if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err)) + } + } + }() + le := leaseEntry{ - LeaseID: path.Join(req.Path, leaseUUID), + LeaseID: leaseID, ClientToken: req.ClientToken, Path: req.Path, Data: resp.Data, @@ -548,6 +692,10 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons func (m *ExpirationManager) RegisterAuth(source string, auth *logical.Auth) error { defer metrics.MeasureSince([]string{"expire", "register-auth"}, time.Now()) + if auth.ClientToken == "" { + return fmt.Errorf("expiration: cannot register an auth lease with an empty token") + } + // Create a lease entry le := leaseEntry{ LeaseID: path.Join(source, m.tokenStore.SaltID(auth.ClientToken)), @@ -668,7 +816,7 @@ func (m *ExpirationManager) revokeEntry(le *leaseEntry) error { // Revocation of login tokens is special since we can by-pass the // backend and directly interact with the token store if le.Auth != nil { - if err := m.tokenStore.RevokeTree(le.Auth.ClientToken); err != nil { + if err := m.tokenStore.RevokeTree(le.ClientToken); err != nil { return fmt.Errorf("failed to revoke token: %v", err) } diff --git a/vault/expiration_test.go b/vault/expiration_test.go index ced6b4231..7598611d9 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -33,6 +33,211 @@ func mockBackendExpiration(t testing.TB, backend physical.Backend) (*Core, *Expi return c, ts.expiration } +func TestExpiration_Tidy(t *testing.T) { + var err error + + exp := mockExpiration(t) + + // Set up a count function to calculate number of leases + count := 0 + countFunc := func(leaseID string) { + count++ + } + + // Scan the storage with the count func set + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that there are no leases to begin with + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + // Create a lease entry without a client token in it + le := &leaseEntry{ + LeaseID: "lease/with/no/client/token", + Path: "foo/bar", + } + + // Persist the invalid lease entry + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that the storage was successful and that the count of leases is + // now 1 + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } + + // Run the tidy operation + err = exp.Tidy() + if err != nil { + t.Fatal(err) + } + + count = 0 + if err := logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + // Set a revoked/invalid token in the lease entry + le.ClientToken = "invalidtoken" + + // Persist the invalid lease entry + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that the storage was successful and that the count of leases is + // now 1 + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } + + // Run the tidy operation + err = exp.Tidy() + if err != nil { + t.Fatal(err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + // Attach an invalid token with 2 leases + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + le.LeaseID = "another/invalid/lease" + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + // Run the tidy operation + err = exp.Tidy() + if err != nil { + t.Fatal(err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + for i := 0; i < 1000; i++ { + req := &logical.Request{ + Operation: logical.ReadOperation, + Path: "invalid/lease/" + fmt.Sprintf("%d", i+1), + ClientToken: "invalidtoken", + } + resp := &logical.Response{ + Secret: &logical.Secret{ + LeaseOptions: logical.LeaseOptions{ + TTL: 100 * time.Millisecond, + }, + }, + Data: map[string]interface{}{ + "test_key": "test_value", + }, + } + _, err := exp.Register(req, resp) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that there are 1000 leases now + if count != 1000 { + t.Fatalf("bad: lease count; expected:1000 actual:%d", count) + } + + errCh1 := make(chan error) + errCh2 := make(chan error) + + // Initiate tidy of the above 1000 invalid leases in quick succession. Only + // one tidy operation can be in flight at any time. One of these requests + // should error out. + go func() { + errCh1 <- exp.Tidy() + }() + + go func() { + errCh2 <- exp.Tidy() + }() + + var err1, err2 error + + for i := 0; i < 2; i++ { + select { + case err1 = <-errCh1: + case err2 = <-errCh2: + } + } + + if !(err1 != nil && err1.Error() == "tidy operation on leases is already in progress") && + !(err2 != nil && err2.Error() == "tidy operation on leases is already in progress") { + t.Fatal("expected at least one of err1 or err2 to be set; err1: %#v\n err2:%#v\n", err1, err2) + } + + root, err := exp.tokenStore.rootToken() + le.ClientToken = root.ID + + // Attach a valid token with the leases + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + // Run the tidy operation + err = exp.Tidy() + if err != nil { + t.Fatal(err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the valid lease entry should not get affected + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } +} + func BenchmarkExpiration_Restore_Etcd(b *testing.B) { addr := os.Getenv("PHYSICAL_BACKEND_BENCHMARK_ADDR") randPath := fmt.Sprintf("vault-%d/", time.Now().Unix()) @@ -145,8 +350,9 @@ func TestExpiration_Restore(t *testing.T) { } for _, path := range paths { req := &logical.Request{ - Operation: logical.ReadOperation, - Path: path, + Operation: logical.ReadOperation, + Path: path, + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -200,8 +406,9 @@ func TestExpiration_Restore(t *testing.T) { func TestExpiration_Register(t *testing.T) { exp := mockExpiration(t) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -299,8 +506,9 @@ func TestExpiration_Revoke(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -341,8 +549,9 @@ func TestExpiration_RevokeOnExpire(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -400,8 +609,9 @@ func TestExpiration_RevokePrefix(t *testing.T) { } for _, path := range paths { req := &logical.Request{ - Operation: logical.ReadOperation, - Path: path, + Operation: logical.ReadOperation, + Path: path, + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -588,8 +798,9 @@ func TestExpiration_Renew(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -654,8 +865,9 @@ func TestExpiration_Renew_NotRenewable(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -700,8 +912,9 @@ func TestExpiration_Renew_RevokeOnExpire(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ diff --git a/vault/logical_system.go b/vault/logical_system.go index 4a7f28fb7..98aef6275 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -423,6 +423,17 @@ func NewSystemBackend(core *Core, config *logical.BackendConfig) (logical.Backen HelpDescription: strings.TrimSpace(sysHelp["revoke-prefix"][1]), }, + &framework.Path{ + Pattern: "leases/tidy$", + + Callbacks: map[logical.Operation]framework.OperationFunc{ + logical.UpdateOperation: b.handleTidyLeases, + }, + + HelpSynopsis: strings.TrimSpace(sysHelp["tidy_leases"][0]), + HelpDescription: strings.TrimSpace(sysHelp["tidy_leases"][1]), + }, + &framework.Path{ Pattern: "auth$", @@ -798,6 +809,15 @@ type SystemBackend struct { Backend *framework.Backend } +func (b *SystemBackend) handleTidyLeases(req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + err := b.Core.expiration.Tidy() + if err != nil { + b.Backend.Logger().Error("sys: failed to tidy leases", "error", err) + return handleError(err) + } + return nil, err +} + func (b *SystemBackend) invalidate(key string) { if b.Core.logger.IsTrace() { b.Core.logger.Trace("sys: invaliding key", "key", key) @@ -973,7 +993,7 @@ func (b *SystemBackend) handleCapabilitiesAccessor(req *logical.Request, d *fram return logical.ErrorResponse("missing accessor"), nil } - aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor) + aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor, false) if err != nil { return nil, err } @@ -2593,6 +2613,15 @@ Enable a new audit backend or disable an existing backend. on a given path.`, }, + "tidy_leases": { + `This endpoint performs cleanup tasks that can be run if certain error +conditions have occurred.`, + `This endpoint performs cleanup tasks that can be run to clean up the +lease entries after certain error conditions. Usually running this is not +necessary, and is only required if upgrade notes or support personnel suggest +it.`, + }, + "wrap": { "Response-wraps an arbitrary JSON object.", `Round trips the given input data into a response-wrapped token.`, diff --git a/vault/request_handling.go b/vault/request_handling.go index 1326ef518..7bc331d59 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -289,10 +289,11 @@ func (c *Core) handleRequest(req *logical.Request) (retResp *logical.Response, r if err != nil { c.logger.Error("core: failed to look up token", "error", err) retErr = multierror.Append(retErr, ErrInternalError) - return nil, nil, retErr + return nil, auth, retErr } if err := c.expiration.RegisterAuth(te.Path, resp.Auth); err != nil { + c.tokenStore.Revoke(te.ID) c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err) retErr = multierror.Append(retErr, ErrInternalError) return nil, auth, retErr @@ -440,6 +441,7 @@ func (c *Core) handleLoginRequest(req *logical.Request) (*logical.Response, *log // Register with the expiration manager if err := c.expiration.RegisterAuth(te.Path, auth); err != nil { + c.tokenStore.Revoke(te.ID) c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err) return nil, auth, ErrInternalError } diff --git a/vault/token_store.go b/vault/token_store.go index 46614ed62..499d19b02 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -3,10 +3,14 @@ package vault import ( "encoding/json" "fmt" + "sync/atomic" + "regexp" "strings" "time" + log "github.com/mgutz/logxi/v1" + "github.com/armon/go-metrics" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" @@ -90,6 +94,10 @@ type TokenStore struct { tokenLocks []*locksutil.LockEntry cubbyholeDestroyer func(*TokenStore, string) error + + logger log.Logger + + tidyLock int64 } // NewTokenStore is used to construct a token store that is @@ -102,14 +110,14 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error) t := &TokenStore{ view: view, cubbyholeDestroyer: destroyCubbyhole, + logger: c.logger, + tokenLocks: locksutil.CreateLocks(), } if c.policyStore != nil { t.policyLookupFunc = c.policyStore.GetPolicy } - t.tokenLocks = locksutil.CreateLocks() - // Setup the framework endpoints t.Backend = &framework.Backend{ AuthRenew: t.authRenew, @@ -610,7 +618,7 @@ func (ts *TokenStore) tokenStoreAccessorList( ret := make([]string, 0, len(entries)) for _, entry := range entries { - aEntry, err := ts.lookupBySaltedAccessor(entry) + aEntry, err := ts.lookupBySaltedAccessor(entry, false) if err != nil { resp.AddWarning("Found an accessor entry that could not be successfully decoded") continue @@ -1064,11 +1072,11 @@ func (ts *TokenStore) handleCreateAgainstRole( return ts.handleCreateCommon(req, d, false, roleEntry) } -func (ts *TokenStore) lookupByAccessor(accessor string) (accessorEntry, error) { - return ts.lookupBySaltedAccessor(ts.SaltID(accessor)) +func (ts *TokenStore) lookupByAccessor(accessor string, tainted bool) (accessorEntry, error) { + return ts.lookupBySaltedAccessor(ts.SaltID(accessor), tainted) } -func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEntry, error) { +func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string, tainted bool) (accessorEntry, error) { entry, err := ts.view.Get(accessorPrefix + saltedAccessor) var aEntry accessorEntry @@ -1082,8 +1090,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt err = jsonutil.DecodeJSON(entry.Value, &aEntry) // If we hit an error, assume it's a pre-struct straight token ID if err != nil { - aEntry.TokenID = string(entry.Value) - te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID), false) + te, err := ts.lookupSalted(ts.SaltID(string(entry.Value)), tainted) if err != nil { return accessorEntry{}, fmt.Errorf("failed to look up token using accessor index: %s", err) } @@ -1093,6 +1100,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt // on lookup is nil, not an error, so we keep that behavior here to be // safe...the token ID is simply not filled in. if te != nil { + aEntry.TokenID = te.ID aEntry.AccessorID = te.Accessor } } @@ -1103,49 +1111,80 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt // handleTidy handles the cleaning up of leaked accessor storage entries and // cleaning up of leases that are associated to tokens that are expired. func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData) (*logical.Response, error) { + var tidyErrors *multierror.Error + + if !atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { + ts.logger.Warn("token: tidy operation on tokens is already in progress") + return nil, fmt.Errorf("tidy operation on tokens is already in progress") + } + + defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) + + ts.logger.Debug("token: beginning tidy operation on tokens") + defer ts.logger.Debug("token: finished tidy operation on tokens") + // List out all the accessors saltedAccessorList, err := ts.view.List(accessorPrefix) if err != nil { - return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) + return nil, fmt.Errorf("failed to fetch accessor index entries: %v", err) } - var tidyErrors *multierror.Error - // First, clean up secondary index entries that are no longer valid parentList, err := ts.view.List(parentPrefix) if err != nil { return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) } + var countParentList, deletedCountParentList int64 + // Scan through the secondary index entries; if there is an entry // with the token's salt ID at the end, remove it for _, parent := range parentList { - children, err := ts.view.List(parentPrefix + parent + "/") + children, err := ts.view.List(parentPrefix + parent) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index: %v", err)) continue } for _, child := range children { + countParentList++ + if countParentList%500 == 0 { + ts.logger.Info("token: checking validity of tokens in secondary index list", "progress", countParentList) + } + // Look up tainted entries so we can be sure that if this isn't - // found, it doesn't exist + // found, it doesn't exist. Doing the following without locking + // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - err = ts.view.Delete(parentPrefix + parent + "/" + child) + index := parentPrefix + parent + child + ts.logger.Trace("token: deleting invalid secondary index", "index", index) + err = ts.view.Delete(index) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index: %v", err)) } + deletedCountParentList++ } } } + var countAccessorList, + deletedCountAccessorEmptyToken, + deletedCountAccessorInvalidToken, + deletedCountInvalidTokenInAccessor int64 + // For each of the accessor, see if the token ID associated with it is // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { - accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) + countAccessorList++ + if countAccessorList%500 == 0 { + ts.logger.Info("token: checking if accessors contain valid tokens", "progress", countAccessorList) + } + + accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor, true) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor index: %v", err)) continue } @@ -1153,25 +1192,32 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { + index := accessorPrefix + saltedAccessor // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation - err = ts.view.Delete(accessorPrefix + saltedAccessor) + err = ts.view.Delete(index) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor index: %v", err)) continue } + deletedCountAccessorEmptyToken++ } - saltedId := ts.SaltID(accessorEntry.TokenID) + lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) + lock.RLock() // Look up tainted variants so we only find entries that truly don't // exist + saltedId := ts.SaltID(accessorEntry.TokenID) te, err := ts.lookupSalted(saltedId, true) + lock.RUnlock() // If token entry is not found assume that the token is not valid any // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { + ts.logger.Info("token: deleting token with nil entry", "salted_token", saltedId) + // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token // entry only has ID set. @@ -1186,26 +1232,31 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) continue } + deletedCountInvalidTokenInAccessor++ + + index := accessorPrefix + saltedAccessor // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on // next run if something above failed we still have the accessor // entry to try again. - err = ts.view.Delete(accessorPrefix + saltedAccessor) + err = ts.view.Delete(index) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) continue } + deletedCountAccessorInvalidToken++ } } - // Later request handling code seems to check if the type is multierror so - // if we haven't added any errors we need to just return a normal nil error - if tidyErrors == nil { - return nil, nil - } + ts.logger.Debug("token: number of tokens scanned in parent index list", "count", countParentList) + ts.logger.Debug("token: number of tokens revoked in parent index list", "count", deletedCountParentList) + ts.logger.Debug("token: number of accessors scanned", "count", countAccessorList) + ts.logger.Debug("token: number of deleted accessors which had empty tokens", "count", deletedCountAccessorEmptyToken) + ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "count", deletedCountInvalidTokenInAccessor) + ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "count", deletedCountAccessorInvalidToken) - return nil, tidyErrors + return nil, tidyErrors.ErrorOrNil() } // handleUpdateLookupAccessor handles the auth/token/lookup-accessor path for returning @@ -1221,7 +1272,7 @@ func (ts *TokenStore) handleUpdateLookupAccessor(req *logical.Request, data *fra urlaccessor = true } - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { return nil, err } @@ -1275,7 +1326,7 @@ func (ts *TokenStore) handleUpdateRevokeAccessor(req *logical.Request, data *fra urlaccessor = true } - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, true) if err != nil { return nil, err } diff --git a/vault/token_store_test.go b/vault/token_store_test.go index 7a84fe737..d802080d1 100644 --- a/vault/token_store_test.go +++ b/vault/token_store_test.go @@ -240,7 +240,7 @@ func TestTokenStore_AccessorIndex(t *testing.T) { t.Fatalf("bad: %#v", out) } - aEntry, err := ts.lookupByAccessor(out.Accessor) + aEntry, err := ts.lookupByAccessor(out.Accessor, false) if err != nil { t.Fatalf("err: %s", err) } @@ -318,7 +318,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) { // Test upgrade from old struct method of accessor storage (of token id) for _, accessor := range keys { - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { t.Fatal(err) } @@ -352,7 +352,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) { } for _, accessor := range keys2 { - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { t.Fatal(err) }