Merge pull request #2452 from hashicorp/sys-tidy-leases

Cleaning of stale leases
2017-05-05 10:54:25 -04:00 · 2017-05-05 10:54:25 -04:00 · 5160a13818
parent 61f115ba81 55ef4f2566
commit 5160a13818
6 changed files with 497 additions and 54 deletions
--- a/vault/expiration.go
+++ b/vault/expiration.go
@ -6,14 +6,18 @@ import (
 	"path"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/armon/go-metrics"
 	log "github.com/mgutz/logxi/v1"

+	"github.com/hashicorp/errwrap"
+	multierror "github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/go-uuid"
 	"github.com/hashicorp/vault/helper/consts"
 	"github.com/hashicorp/vault/helper/jsonutil"
+	"github.com/hashicorp/vault/helper/locksutil"
 	"github.com/hashicorp/vault/logical"
 )

@ -57,6 +61,8 @@ type ExpirationManager struct {

 	pending     map[string]*time.Timer
 	pendingLock sync.Mutex
+
+	tidyLock int64
 }

 // NewExpirationManager creates a new ExpirationManager that is backed
@ -114,6 +120,113 @@ func (c *Core) stopExpiration() error {
 	return nil
 }

+// Tidy cleans up the dangling storage entries for leases. It scans the storage
+// view to find all the available leases, checks if the token embedded in it is
+// either empty or invalid and in both the cases, it revokes them. It also uses
+// a token cache to avoid multiple lookups of the same token ID. It is normally
+// not required to use the API that invokes this. This is only intended to
+// clean up the corrupt storage due to bugs.
+func (m *ExpirationManager) Tidy() error {
+	var tidyErrors *multierror.Error
+
+	if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) {
+		m.logger.Warn("expiration: tidy operation on leases is already in progress")
+		return fmt.Errorf("tidy operation on leases is already in progress")
+	}
+
+	defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0)
+
+	m.logger.Info("expiration: beginning tidy operation on leases")
+	defer m.logger.Info("expiration: finished tidy operation on leases")
+
+	// Create a cache to keep track of looked up tokens
+	tokenCache := make(map[string]bool)
+	var countLease, revokedCount, deletedCountInvalidToken, deletedCountEmptyToken int64
+
+	tidyFunc := func(leaseID string) {
+		countLease++
+		if countLease%500 == 0 {
+			m.logger.Info("expiration: tidying leases", "progress", countLease)
+		}
+
+		le, err := m.loadEntry(leaseID)
+		if err != nil {
+			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err))
+			return
+		}
+
+		if le == nil {
+			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("nil entry for lease ID %q: %v", leaseID, err))
+			return
+		}
+
+		var isValid, ok bool
+		revokeLease := false
+		if le.ClientToken == "" {
+			m.logger.Trace("expiration: revoking lease which has an empty token", "lease_id", leaseID)
+			revokeLease = true
+			deletedCountEmptyToken++
+			goto REVOKE_CHECK
+		}
+
+		isValid, ok = tokenCache[le.ClientToken]
+		if !ok {
+			saltedID := m.tokenStore.SaltID(le.ClientToken)
+			lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken)
+			lock.RLock()
+			te, err := m.tokenStore.lookupSalted(saltedID, true)
+			lock.RUnlock()
+
+			if err != nil {
+				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err))
+				return
+			}
+
+			if te == nil {
+				m.logger.Trace("expiration: revoking lease which holds an invalid token", "lease_id", leaseID)
+				revokeLease = true
+				deletedCountInvalidToken++
+				tokenCache[le.ClientToken] = false
+			} else {
+				tokenCache[le.ClientToken] = true
+			}
+			goto REVOKE_CHECK
+		} else {
+			if isValid {
+				return
+			} else {
+				m.logger.Trace("expiration: revoking lease which contains an invalid token", "lease_id", leaseID)
+				revokeLease = true
+				deletedCountInvalidToken++
+			}
+			goto REVOKE_CHECK
+		}
+
+	REVOKE_CHECK:
+		if revokeLease {
+			// Force the revocation and skip going through the token store
+			// again
+			err = m.revokeCommon(leaseID, true, true)
+			if err != nil {
+				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err))
+				return
+			}
+			revokedCount++
+		}
+	}
+
+	if err := logical.ScanView(m.idView, tidyFunc); err != nil {
+		return err
+	}
+
+	m.logger.Debug("expiration: number of leases scanned", "count", countLease)
+	m.logger.Debug("expiration: number of leases which had empty tokens", "count", deletedCountEmptyToken)
+	m.logger.Debug("expiration: number of leases which had invalid tokens", "count", deletedCountInvalidToken)
+	m.logger.Debug("expiration: number of leases successfully revoked", "count", revokedCount)
+
+	return tidyErrors.ErrorOrNil()
+}
+
 // Restore is used to recover the lease states when starting.
 // This is used after starting the vault.
 func (m *ExpirationManager) Restore() error {
@ -498,8 +611,13 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke
 // Register is used to take a request and response with an associated
 // lease. The secret gets assigned a LeaseID and the management of
 // of lease is assumed by the expiration manager.
-func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (string, error) {
+func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) {
 	defer metrics.MeasureSince([]string{"expire", "register"}, time.Now())
+
+	if req.ClientToken == "" {
+		return "", fmt.Errorf("expiration: cannot register a lease with an empty client token")
+	}
+
 	// Ignore if there is no leased secret
 	if resp == nil || resp.Secret == nil {
 		return "", nil
@ -515,8 +633,34 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons
 	if err != nil {
 		return "", err
 	}
+
+	leaseID := path.Join(req.Path, leaseUUID)
+
+	defer func() {
+		// If there is an error we want to rollback as much as possible (note
+		// that errors here are ignored to do as much cleanup as we can). We
+		// want to revoke a generated secret (since an error means we may not
+		// be successfully tracking it), remove indexes, and delete the entry.
+		if retErr != nil {
+			revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data))
+			if err != nil {
+				retErr = multierror.Append(retErr, errwrap.Wrapf("an additional internal error was encountered revoking the newly-generated secret: {{err}}", err))
+			} else if revResp != nil && revResp.IsError() {
+				retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error()))
+			}
+
+			if err := m.deleteEntry(leaseID); err != nil {
+				retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered deleting any lease associated with the newly-generated secret: {{err}}", err))
+			}
+
+			if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil {
+				retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err))
+			}
+		}
+	}()
+
 	le := leaseEntry{
-		LeaseID:     path.Join(req.Path, leaseUUID),
+		LeaseID:     leaseID,
 		ClientToken: req.ClientToken,
 		Path:        req.Path,
 		Data:        resp.Data,
@ -548,6 +692,10 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons
 func (m *ExpirationManager) RegisterAuth(source string, auth *logical.Auth) error {
 	defer metrics.MeasureSince([]string{"expire", "register-auth"}, time.Now())

+	if auth.ClientToken == "" {
+		return fmt.Errorf("expiration: cannot register an auth lease with an empty token")
+	}
+
 	// Create a lease entry
 	le := leaseEntry{
 		LeaseID:     path.Join(source, m.tokenStore.SaltID(auth.ClientToken)),
@ -668,7 +816,7 @@ func (m *ExpirationManager) revokeEntry(le *leaseEntry) error {
 	// Revocation of login tokens is special since we can by-pass the
 	// backend and directly interact with the token store
 	if le.Auth != nil {
-		if err := m.tokenStore.RevokeTree(le.Auth.ClientToken); err != nil {
+		if err := m.tokenStore.RevokeTree(le.ClientToken); err != nil {
 			return fmt.Errorf("failed to revoke token: %v", err)
 		}

--- a/vault/expiration_test.go
+++ b/vault/expiration_test.go
@ -33,6 +33,211 @@ func mockBackendExpiration(t testing.TB, backend physical.Backend) (*Core, *Expi
 	return c, ts.expiration
 }

+func TestExpiration_Tidy(t *testing.T) {
+	var err error
+
+	exp := mockExpiration(t)
+
+	// Set up a count function to calculate number of leases
+	count := 0
+	countFunc := func(leaseID string) {
+		count++
+	}
+
+	// Scan the storage with the count func set
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that there are no leases to begin with
+	if count != 0 {
+		t.Fatalf("bad: lease count; expected:0 actual:%d", count)
+	}
+
+	// Create a lease entry without a client token in it
+	le := &leaseEntry{
+		LeaseID: "lease/with/no/client/token",
+		Path:    "foo/bar",
+	}
+
+	// Persist the invalid lease entry
+	if err = exp.persistEntry(le); err != nil {
+		t.Fatalf("error persisting entry: %v", err)
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that the storage was successful and that the count of leases is
+	// now 1
+	if count != 1 {
+		t.Fatalf("bad: lease count; expected:1 actual:%d", count)
+	}
+
+	// Run the tidy operation
+	err = exp.Tidy()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	count = 0
+	if err := logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Post the tidy operation, the invalid lease entry should have been gone
+	if count != 0 {
+		t.Fatalf("bad: lease count; expected:0 actual:%d", count)
+	}
+
+	// Set a revoked/invalid token in the lease entry
+	le.ClientToken = "invalidtoken"
+
+	// Persist the invalid lease entry
+	if err = exp.persistEntry(le); err != nil {
+		t.Fatalf("error persisting entry: %v", err)
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that the storage was successful and that the count of leases is
+	// now 1
+	if count != 1 {
+		t.Fatalf("bad: lease count; expected:1 actual:%d", count)
+	}
+
+	// Run the tidy operation
+	err = exp.Tidy()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Post the tidy operation, the invalid lease entry should have been gone
+	if count != 0 {
+		t.Fatalf("bad: lease count; expected:0 actual:%d", count)
+	}
+
+	// Attach an invalid token with 2 leases
+	if err = exp.persistEntry(le); err != nil {
+		t.Fatalf("error persisting entry: %v", err)
+	}
+
+	le.LeaseID = "another/invalid/lease"
+	if err = exp.persistEntry(le); err != nil {
+		t.Fatalf("error persisting entry: %v", err)
+	}
+
+	// Run the tidy operation
+	err = exp.Tidy()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Post the tidy operation, the invalid lease entry should have been gone
+	if count != 0 {
+		t.Fatalf("bad: lease count; expected:0 actual:%d", count)
+	}
+
+	for i := 0; i < 1000; i++ {
+		req := &logical.Request{
+			Operation:   logical.ReadOperation,
+			Path:        "invalid/lease/" + fmt.Sprintf("%d", i+1),
+			ClientToken: "invalidtoken",
+		}
+		resp := &logical.Response{
+			Secret: &logical.Secret{
+				LeaseOptions: logical.LeaseOptions{
+					TTL: 100 * time.Millisecond,
+				},
+			},
+			Data: map[string]interface{}{
+				"test_key": "test_value",
+			},
+		}
+		_, err := exp.Register(req, resp)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that there are 1000 leases now
+	if count != 1000 {
+		t.Fatalf("bad: lease count; expected:1000 actual:%d", count)
+	}
+
+	errCh1 := make(chan error)
+	errCh2 := make(chan error)
+
+	// Initiate tidy of the above 1000 invalid leases in quick succession. Only
+	// one tidy operation can be in flight at any time. One of these requests
+	// should error out.
+	go func() {
+		errCh1 <- exp.Tidy()
+	}()
+
+	go func() {
+		errCh2 <- exp.Tidy()
+	}()
+
+	var err1, err2 error
+
+	for i := 0; i < 2; i++ {
+		select {
+		case err1 = <-errCh1:
+		case err2 = <-errCh2:
+		}
+	}
+
+	if !(err1 != nil && err1.Error() == "tidy operation on leases is already in progress") &&
+		!(err2 != nil && err2.Error() == "tidy operation on leases is already in progress") {
+		t.Fatal("expected at least one of err1 or err2 to be set; err1: %#v\n err2:%#v\n", err1, err2)
+	}
+
+	root, err := exp.tokenStore.rootToken()
+	le.ClientToken = root.ID
+
+	// Attach a valid token with the leases
+	if err = exp.persistEntry(le); err != nil {
+		t.Fatalf("error persisting entry: %v", err)
+	}
+
+	// Run the tidy operation
+	err = exp.Tidy()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	count = 0
+	if err = logical.ScanView(exp.idView, countFunc); err != nil {
+		t.Fatal(err)
+	}
+
+	// Post the tidy operation, the valid lease entry should not get affected
+	if count != 1 {
+		t.Fatalf("bad: lease count; expected:1 actual:%d", count)
+	}
+}
+
 func BenchmarkExpiration_Restore_Etcd(b *testing.B) {
 	addr := os.Getenv("PHYSICAL_BACKEND_BENCHMARK_ADDR")
 	randPath := fmt.Sprintf("vault-%d/", time.Now().Unix())
@ -145,8 +350,9 @@ func TestExpiration_Restore(t *testing.T) {
 	}
 	for _, path := range paths {
 		req := &logical.Request{
-			Operation: logical.ReadOperation,
-			Path:      path,
+			Operation:   logical.ReadOperation,
+			Path:        path,
+			ClientToken: "foobar",
 		}
 		resp := &logical.Response{
 			Secret: &logical.Secret{
@ -200,8 +406,9 @@ func TestExpiration_Restore(t *testing.T) {
 func TestExpiration_Register(t *testing.T) {
 	exp := mockExpiration(t)
 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
@ -299,8 +506,9 @@ func TestExpiration_Revoke(t *testing.T) {
 	exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view)

 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
@ -341,8 +549,9 @@ func TestExpiration_RevokeOnExpire(t *testing.T) {
 	exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view)

 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
@ -400,8 +609,9 @@ func TestExpiration_RevokePrefix(t *testing.T) {
 	}
 	for _, path := range paths {
 		req := &logical.Request{
-			Operation: logical.ReadOperation,
-			Path:      path,
+			Operation:   logical.ReadOperation,
+			Path:        path,
+			ClientToken: "foobar",
 		}
 		resp := &logical.Response{
 			Secret: &logical.Secret{
@ -588,8 +798,9 @@ func TestExpiration_Renew(t *testing.T) {
 	exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view)

 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
@ -654,8 +865,9 @@ func TestExpiration_Renew_NotRenewable(t *testing.T) {
 	exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view)

 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
@ -700,8 +912,9 @@ func TestExpiration_Renew_RevokeOnExpire(t *testing.T) {
 	exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view)

 	req := &logical.Request{
-		Operation: logical.ReadOperation,
-		Path:      "prod/aws/foo",
+		Operation:   logical.ReadOperation,
+		Path:        "prod/aws/foo",
+		ClientToken: "foobar",
 	}
 	resp := &logical.Response{
 		Secret: &logical.Secret{
--- a/vault/logical_system.go
+++ b/vault/logical_system.go
@ -423,6 +423,17 @@ func NewSystemBackend(core *Core, config *logical.BackendConfig) (logical.Backen
 				HelpDescription: strings.TrimSpace(sysHelp["revoke-prefix"][1]),
 			},

+			&framework.Path{
+				Pattern: "leases/tidy$",
+
+				Callbacks: map[logical.Operation]framework.OperationFunc{
+					logical.UpdateOperation: b.handleTidyLeases,
+				},
+
+				HelpSynopsis:    strings.TrimSpace(sysHelp["tidy_leases"][0]),
+				HelpDescription: strings.TrimSpace(sysHelp["tidy_leases"][1]),
+			},
+
 			&framework.Path{
 				Pattern: "auth$",

@ -798,6 +809,15 @@ type SystemBackend struct {
 	Backend *framework.Backend
 }

+func (b *SystemBackend) handleTidyLeases(req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
+	err := b.Core.expiration.Tidy()
+	if err != nil {
+		b.Backend.Logger().Error("sys: failed to tidy leases", "error", err)
+		return handleError(err)
+	}
+	return nil, err
+}
+
 func (b *SystemBackend) invalidate(key string) {
 	if b.Core.logger.IsTrace() {
 		b.Core.logger.Trace("sys: invaliding key", "key", key)
@ -973,7 +993,7 @@ func (b *SystemBackend) handleCapabilitiesAccessor(req *logical.Request, d *fram
 		return logical.ErrorResponse("missing accessor"), nil
 	}

-	aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor)
+	aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor, false)
 	if err != nil {
 		return nil, err
 	}
@ -2593,6 +2613,15 @@ Enable a new audit backend or disable an existing backend.
 		on a given path.`,
 	},

+	"tidy_leases": {
+		`This endpoint performs cleanup tasks that can be run if certain error
+conditions have occurred.`,
+		`This endpoint performs cleanup tasks that can be run to clean up the
+lease entries after certain error conditions. Usually running this is not
+necessary, and is only required if upgrade notes or support personnel suggest
+it.`,
+	},
+
 	"wrap": {
 		"Response-wraps an arbitrary JSON object.",
 		`Round trips the given input data into a response-wrapped token.`,
--- a/vault/request_handling.go
+++ b/vault/request_handling.go
@ -289,10 +289,11 @@ func (c *Core) handleRequest(req *logical.Request) (retResp *logical.Response, r
 		if err != nil {
 			c.logger.Error("core: failed to look up token", "error", err)
 			retErr = multierror.Append(retErr, ErrInternalError)
-			return nil, nil, retErr
+			return nil, auth, retErr
 		}

 		if err := c.expiration.RegisterAuth(te.Path, resp.Auth); err != nil {
+			c.tokenStore.Revoke(te.ID)
 			c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err)
 			retErr = multierror.Append(retErr, ErrInternalError)
 			return nil, auth, retErr
@ -440,6 +441,7 @@ func (c *Core) handleLoginRequest(req *logical.Request) (*logical.Response, *log

 		// Register with the expiration manager
 		if err := c.expiration.RegisterAuth(te.Path, auth); err != nil {
+			c.tokenStore.Revoke(te.ID)
 			c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err)
 			return nil, auth, ErrInternalError
 		}
--- a/vault/token_store.go
+++ b/vault/token_store.go
@ -3,10 +3,14 @@ package vault
 import (
 	"encoding/json"
 	"fmt"
+	"sync/atomic"
+
 	"regexp"
 	"strings"
 	"time"

+	log "github.com/mgutz/logxi/v1"
+
 	"github.com/armon/go-metrics"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/go-uuid"
@ -90,6 +94,10 @@ type TokenStore struct {
 	tokenLocks []*locksutil.LockEntry

 	cubbyholeDestroyer func(*TokenStore, string) error
+
+	logger log.Logger
+
+	tidyLock int64
 }

 // NewTokenStore is used to construct a token store that is
@ -102,14 +110,14 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error)
 	t := &TokenStore{
 		view:               view,
 		cubbyholeDestroyer: destroyCubbyhole,
+		logger:             c.logger,
+		tokenLocks:         locksutil.CreateLocks(),
 	}

 	if c.policyStore != nil {
 		t.policyLookupFunc = c.policyStore.GetPolicy
 	}

-	t.tokenLocks = locksutil.CreateLocks()
-
 	// Setup the framework endpoints
 	t.Backend = &framework.Backend{
 		AuthRenew: t.authRenew,
@ -610,7 +618,7 @@ func (ts *TokenStore) tokenStoreAccessorList(

 	ret := make([]string, 0, len(entries))
 	for _, entry := range entries {
-		aEntry, err := ts.lookupBySaltedAccessor(entry)
+		aEntry, err := ts.lookupBySaltedAccessor(entry, false)
 		if err != nil {
 			resp.AddWarning("Found an accessor entry that could not be successfully decoded")
 			continue
@ -1064,11 +1072,11 @@ func (ts *TokenStore) handleCreateAgainstRole(
 	return ts.handleCreateCommon(req, d, false, roleEntry)
 }

-func (ts *TokenStore) lookupByAccessor(accessor string) (accessorEntry, error) {
-	return ts.lookupBySaltedAccessor(ts.SaltID(accessor))
+func (ts *TokenStore) lookupByAccessor(accessor string, tainted bool) (accessorEntry, error) {
+	return ts.lookupBySaltedAccessor(ts.SaltID(accessor), tainted)
 }

-func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEntry, error) {
+func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string, tainted bool) (accessorEntry, error) {
 	entry, err := ts.view.Get(accessorPrefix + saltedAccessor)
 	var aEntry accessorEntry

@ -1082,8 +1090,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt
 	err = jsonutil.DecodeJSON(entry.Value, &aEntry)
 	// If we hit an error, assume it's a pre-struct straight token ID
 	if err != nil {
-		aEntry.TokenID = string(entry.Value)
-		te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID), false)
+		te, err := ts.lookupSalted(ts.SaltID(string(entry.Value)), tainted)
 		if err != nil {
 			return accessorEntry{}, fmt.Errorf("failed to look up token using accessor index: %s", err)
 		}
@ -1093,6 +1100,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt
 		// on lookup is nil, not an error, so we keep that behavior here to be
 		// safe...the token ID is simply not filled in.
 		if te != nil {
+			aEntry.TokenID = te.ID
 			aEntry.AccessorID = te.Accessor
 		}
 	}
@ -1103,49 +1111,80 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt
 // handleTidy handles the cleaning up of leaked accessor storage entries and
 // cleaning up of leases that are associated to tokens that are expired.
 func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
+	var tidyErrors *multierror.Error
+
+	if !atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) {
+		ts.logger.Warn("token: tidy operation on tokens is already in progress")
+		return nil, fmt.Errorf("tidy operation on tokens is already in progress")
+	}
+
+	defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0)
+
+	ts.logger.Debug("token: beginning tidy operation on tokens")
+	defer ts.logger.Debug("token: finished tidy operation on tokens")
+
 	// List out all the accessors
 	saltedAccessorList, err := ts.view.List(accessorPrefix)
 	if err != nil {
-		return nil, fmt.Errorf("failed to fetch accessor entries: %v", err)
+		return nil, fmt.Errorf("failed to fetch accessor index entries: %v", err)
 	}

-	var tidyErrors *multierror.Error
-
 	// First, clean up secondary index entries that are no longer valid
 	parentList, err := ts.view.List(parentPrefix)
 	if err != nil {
 		return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err)
 	}

+	var countParentList, deletedCountParentList int64
+
 	// Scan through the secondary index entries; if there is an entry
 	// with the token's salt ID at the end, remove it
 	for _, parent := range parentList {
-		children, err := ts.view.List(parentPrefix + parent + "/")
+		children, err := ts.view.List(parentPrefix + parent)
 		if err != nil {
-			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err))
+			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index: %v", err))
 			continue
 		}

 		for _, child := range children {
+			countParentList++
+			if countParentList%500 == 0 {
+				ts.logger.Info("token: checking validity of tokens in secondary index list", "progress", countParentList)
+			}
+
 			// Look up tainted entries so we can be sure that if this isn't
-			// found, it doesn't exist
+			// found, it doesn't exist. Doing the following without locking
+			// since appropriate locks cannot be held with salted token IDs.
 			te, _ := ts.lookupSalted(child, true)
 			if te == nil {
-				err = ts.view.Delete(parentPrefix + parent + "/" + child)
+				index := parentPrefix + parent + child
+				ts.logger.Trace("token: deleting invalid secondary index", "index", index)
+				err = ts.view.Delete(index)
 				if err != nil {
-					tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err))
+					tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index: %v", err))
 				}
+				deletedCountParentList++
 			}
 		}
 	}

+	var countAccessorList,
+		deletedCountAccessorEmptyToken,
+		deletedCountAccessorInvalidToken,
+		deletedCountInvalidTokenInAccessor int64
+
 	// For each of the accessor, see if the token ID associated with it is
 	// a valid one. If not, delete the leases associated with that token
 	// and delete the accessor as well.
 	for _, saltedAccessor := range saltedAccessorList {
-		accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor)
+		countAccessorList++
+		if countAccessorList%500 == 0 {
+			ts.logger.Info("token: checking if accessors contain valid tokens", "progress", countAccessorList)
+		}
+
+		accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor, true)
 		if err != nil {
-			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err))
+			tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor index: %v", err))
 			continue
 		}

@ -1153,25 +1192,32 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData
 		// in it. If not, it is an invalid accessor entry and needs to
 		// be deleted.
 		if accessorEntry.TokenID == "" {
+			index := accessorPrefix + saltedAccessor
 			// If deletion of accessor fails, move on to the next
 			// item since this is just a best-effort operation
-			err = ts.view.Delete(accessorPrefix + saltedAccessor)
+			err = ts.view.Delete(index)
 			if err != nil {
-				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err))
+				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor index: %v", err))
 				continue
 			}
+			deletedCountAccessorEmptyToken++
 		}

-		saltedId := ts.SaltID(accessorEntry.TokenID)
+		lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID)
+		lock.RLock()

 		// Look up tainted variants so we only find entries that truly don't
 		// exist
+		saltedId := ts.SaltID(accessorEntry.TokenID)
 		te, err := ts.lookupSalted(saltedId, true)
+		lock.RUnlock()

 		// If token entry is not found assume that the token is not valid any
 		// more and conclude that accessor, leases, and secondary index entries
 		// for this token should not exist as well.
 		if te == nil {
+			ts.logger.Info("token: deleting token with nil entry", "salted_token", saltedId)
+
 			// RevokeByToken expects a '*TokenEntry'. For the
 			// purposes of tidying, it is sufficient if the token
 			// entry only has ID set.
@ -1186,26 +1232,31 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData
 				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err))
 				continue
 			}
+			deletedCountInvalidTokenInAccessor++
+
+			index := accessorPrefix + saltedAccessor

 			// If deletion of accessor fails, move on to the next item since
 			// this is just a best-effort operation. We do this last so that on
 			// next run if something above failed we still have the accessor
 			// entry to try again.
-			err = ts.view.Delete(accessorPrefix + saltedAccessor)
+			err = ts.view.Delete(index)
 			if err != nil {
 				tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err))
 				continue
 			}
+			deletedCountAccessorInvalidToken++
 		}
 	}

-	// Later request handling code seems to check if the type is multierror so
-	// if we haven't added any errors we need to just return a normal nil error
-	if tidyErrors == nil {
-		return nil, nil
-	}
+	ts.logger.Debug("token: number of tokens scanned in parent index list", "count", countParentList)
+	ts.logger.Debug("token: number of tokens revoked in parent index list", "count", deletedCountParentList)
+	ts.logger.Debug("token: number of accessors scanned", "count", countAccessorList)
+	ts.logger.Debug("token: number of deleted accessors which had empty tokens", "count", deletedCountAccessorEmptyToken)
+	ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "count", deletedCountInvalidTokenInAccessor)
+	ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "count", deletedCountAccessorInvalidToken)

-	return nil, tidyErrors
+	return nil, tidyErrors.ErrorOrNil()
 }

 // handleUpdateLookupAccessor handles the auth/token/lookup-accessor path for returning
@ -1221,7 +1272,7 @@ func (ts *TokenStore) handleUpdateLookupAccessor(req *logical.Request, data *fra
 		urlaccessor = true
 	}

-	aEntry, err := ts.lookupByAccessor(accessor)
+	aEntry, err := ts.lookupByAccessor(accessor, false)
 	if err != nil {
 		return nil, err
 	}
@ -1275,7 +1326,7 @@ func (ts *TokenStore) handleUpdateRevokeAccessor(req *logical.Request, data *fra
 		urlaccessor = true
 	}

-	aEntry, err := ts.lookupByAccessor(accessor)
+	aEntry, err := ts.lookupByAccessor(accessor, true)
 	if err != nil {
 		return nil, err
 	}
--- a/vault/token_store_test.go
+++ b/vault/token_store_test.go
@ -240,7 +240,7 @@ func TestTokenStore_AccessorIndex(t *testing.T) {
 		t.Fatalf("bad: %#v", out)
 	}

-	aEntry, err := ts.lookupByAccessor(out.Accessor)
+	aEntry, err := ts.lookupByAccessor(out.Accessor, false)
 	if err != nil {
 		t.Fatalf("err: %s", err)
 	}
@ -318,7 +318,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) {

 	// Test upgrade from old struct method of accessor storage (of token id)
 	for _, accessor := range keys {
-		aEntry, err := ts.lookupByAccessor(accessor)
+		aEntry, err := ts.lookupByAccessor(accessor, false)
 		if err != nil {
 			t.Fatal(err)
 		}
@ -352,7 +352,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) {
 	}

 	for _, accessor := range keys2 {
-		aEntry, err := ts.lookupByAccessor(accessor)
+		aEntry, err := ts.lookupByAccessor(accessor, false)
 		if err != nil {
 			t.Fatal(err)
 		}