diff --git a/vault/expiration.go b/vault/expiration.go index d925c556a..c0012e570 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -214,9 +214,11 @@ func (m *ExpirationManager) revokeCommon(leaseID string, force, skipToken bool) return err } - // Delete the secondary index - if err := m.removeIndexByToken(le.ClientToken, le.LeaseID); err != nil { - return err + // Delete the secondary index, but only if it's a leased secret (not auth) + if le.Secret != nil { + if err := m.removeIndexByToken(le.ClientToken, le.LeaseID); err != nil { + return err + } } // Clear the expiration handler diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 32be9bb10..64d120f4b 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -705,6 +705,14 @@ func TestExpiration_revokeEntry_token(t *testing.T) { t.Fatalf("err: %v", err) } + // N.B.: Vault doesn't allow both a secret and auth to be returned, but the + // reason for both is that auth needs to be included in order to use the + // token store as it's the only mounted backend, *but* RegisterAuth doesn't + // actually create the index by token, only Register (for a Secret) does. + // So without the Secret we don't do anything when removing the index which + // (at the time of writing) now fails because a bug causing every token + // expiration to do an extra delete to a non-existent key has been fixed, + // and this test relies on this nonstandard behavior. le := &leaseEntry{ LeaseID: "foo/bar/1234", Auth: &logical.Auth{ @@ -713,6 +721,11 @@ func TestExpiration_revokeEntry_token(t *testing.T) { TTL: time.Minute, }, }, + Secret: &logical.Secret{ + LeaseOptions: logical.LeaseOptions{ + TTL: time.Minute, + }, + }, ClientToken: root.ID, Path: "foo/bar", IssueTime: time.Now(), @@ -725,7 +738,7 @@ func TestExpiration_revokeEntry_token(t *testing.T) { if err := exp.createIndexByToken(le.ClientToken, le.LeaseID); err != nil { t.Fatalf("error creating secondary index: %v", err) } - exp.updatePending(le, le.Auth.LeaseTotal()) + exp.updatePending(le, le.Secret.LeaseTotal()) indexEntry, err := exp.indexByToken(le.ClientToken, le.LeaseID) if err != nil { diff --git a/vault/token_store.go b/vault/token_store.go index 56a2a3372..f78b0472e 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -40,6 +40,20 @@ const ( // rolesPrefix is the prefix used to store role information rolesPrefix = "roles/" + + // tokenRevocationDeferred indicates that the token should not be used + // again but is currently fulfilling its final use + tokenRevocationDeferred = -1 + + // tokenRevocationInProgress indicates that revocation of that token/its + // leases is ongoing + tokenRevocationInProgress = -2 + + // tokenRevocationFailed indicates that revocation failed; the entry is + // kept around so that when the cleanup function is run it can be tried + // again (or when the revocation function is run again), but all other uses + // will report the token invalid + tokenRevocationFailed = -3 ) var ( @@ -48,6 +62,14 @@ var ( // pathSuffixSanitize is used to ensure a path suffix in a role is valid. pathSuffixSanitize = regexp.MustCompile("\\w[\\w-.]+\\w") + + destroyCubbyhole = func(ts *TokenStore, saltedID string) error { + if ts.cubbyholeBackend == nil { + // Should only ever happen in testing + return nil + } + return ts.cubbyholeBackend.revoke(salt.SaltID(ts.cubbyholeBackend.saltUUID, saltedID, salt.SHA1Hash)) + } ) // TokenStore is used to manage client tokens. Tokens are used for @@ -66,6 +88,8 @@ type TokenStore struct { policyLookupFunc func(string) (*Policy, error) tokenLocks map[string]*sync.RWMutex + + cubbyholeDestroyer func(*TokenStore, string) error } // NewTokenStore is used to construct a token store that is @@ -76,7 +100,8 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error) // Initialize the store t := &TokenStore{ - view: view, + view: view, + cubbyholeDestroyer: destroyCubbyhole, } if c.policyStore != nil { @@ -460,7 +485,13 @@ type TokenEntry struct { // Used for operators to be able to associate with the source DisplayName string `json:"display_name" mapstructure:"display_name" structs:"display_name"` - // Used to restrict the number of uses (zero is unlimited). This is to support one-time-tokens (generalized). + // Used to restrict the number of uses (zero is unlimited). This is to + // support one-time-tokens (generalized). There are a few special values: + // if it's -1 it has run through its use counts and is executing its final + // use; if it's -2 it is tainted, which means revocation is currently + // running on it; and if it's -3 it's also tainted but revocation + // previously ran and failed, so this hints the cleanup function to try it + // again. NumUses int `json:"num_uses" mapstructure:"num_uses" structs:"num_uses"` // Time of token creation @@ -722,7 +753,7 @@ func (ts *TokenStore) UseToken(te *TokenEntry) (*TokenEntry, error) { defer lock.Unlock() // Call lookupSalted instead of Lookup to avoid deadlocking since Lookup grabs a read lock - te, err := ts.lookupSalted(ts.SaltID(te.ID)) + te, err := ts.lookupSalted(ts.SaltID(te.ID), false) if err != nil { return nil, fmt.Errorf("failed to refresh entry: %v", err) } @@ -746,18 +777,9 @@ func (ts *TokenStore) UseToken(te *TokenEntry) (*TokenEntry, error) { te.NumUses -= 1 } - // Marshal the entry - enc, err := json.Marshal(te) + err = ts.storeCommon(te, false) if err != nil { - return nil, fmt.Errorf("failed to encode entry: %v", err) - } - - // Write under the primary ID - saltedId := ts.SaltID(te.ID) - path := lookupPrefix + saltedId - le := &logical.StorageEntry{Key: path, Value: enc} - if err := ts.view.Put(le); err != nil { - return nil, fmt.Errorf("failed to persist entry: %v", err) + return nil, err } return te, nil @@ -783,11 +805,13 @@ func (ts *TokenStore) Lookup(id string) (*TokenEntry, error) { lock.RLock() defer lock.RUnlock() - return ts.lookupSalted(ts.SaltID(id)) + return ts.lookupSalted(ts.SaltID(id), false) } -// lookupSlated is used to find a token given its salted ID -func (ts *TokenStore) lookupSalted(saltedId string) (*TokenEntry, error) { +// lookupSalted is used to find a token given its salted ID. If tainted is +// true, entries that are in some revocation state (currently, indicated by num +// uses < 0), the entry will be returned anyways +func (ts *TokenStore) lookupSalted(saltedId string, tainted bool) (*TokenEntry, error) { // Lookup token path := lookupPrefix + saltedId raw, err := ts.view.Get(path) @@ -806,8 +830,8 @@ func (ts *TokenStore) lookupSalted(saltedId string) (*TokenEntry, error) { return nil, fmt.Errorf("failed to decode entry: %v", err) } - // This is a token that is awaiting deferred revocation - if entry.NumUses == -1 { + // This is a token that is awaiting deferred revocation or tainted + if entry.NumUses < 0 && !tainted { return nil, nil } @@ -869,46 +893,109 @@ func (ts *TokenStore) Revoke(id string) error { // revokeSalted is used to invalidate a given salted token, // any child tokens will be orphaned. -func (ts *TokenStore) revokeSalted(saltedId string) error { +func (ts *TokenStore) revokeSalted(saltedId string) (ret error) { + // Protect the entry lookup/writing with locks. The rub here is that we + // don't know the ID until we look it up once, so first we look it up, then + // do a locked lookup. + entry, err := ts.lookupSalted(saltedId, true) + if err != nil { + return err + } + if entry == nil { + return nil + } + + lock := ts.getTokenLock(entry.ID) + lock.Lock() + // Lookup the token first - entry, err := ts.lookupSalted(saltedId) + entry, err = ts.lookupSalted(saltedId, true) + if err != nil { + lock.Unlock() + return err + } + + if entry == nil { + lock.Unlock() + return nil + } + + // On failure we write -3, so if we hit -2 here we're already running a + // revocation operation. This can happen due to e.g. recursion into this + // function via the expiration manager's RevokeByToken. + if entry.NumUses == tokenRevocationInProgress { + lock.Unlock() + return nil + } + + // This acts as a WAL. lookupSalted will no longer return this entry, + // so the token cannot be used, but this way we can keep the entry + // around until after the rest of this function is attempted, and a + // cleanup function can key off of this value to try again. + entry.NumUses = tokenRevocationInProgress + err = ts.storeCommon(entry, false) + lock.Unlock() if err != nil { return err } - // Nuke the primary key first - path := lookupPrefix + saltedId - if ts.view.Delete(path); err != nil { - return fmt.Errorf("failed to delete entry: %v", err) + // If we are returning an error, mark the entry with -3 to indicate + // failed revocation. This way we don't try to clean up during active + // revocation (-2). + defer func() { + if ret != nil { + lock.Lock() + defer lock.Unlock() + + // Lookup the token again to make sure something else didn't + // revoke in the interim + entry, err := ts.lookupSalted(saltedId, true) + if err != nil { + return + } + + // If it exists just taint to -3 rather than trying to figure + // out what it means if it's already -3 after the -2 above + if entry != nil { + entry.NumUses = tokenRevocationFailed + ts.storeCommon(entry, false) + } + } + }() + + // Destroy the token's cubby. This should go first as it's a + // security-sensitive item. + err = ts.cubbyholeDestroyer(ts, saltedId) + if err != nil { + return err + } + + // Revoke all secrets under this token. This should go first as it's a + // security-sensitive item. + if err := ts.expiration.RevokeByToken(entry); err != nil { + return err } // Clear the secondary index if any - if entry != nil && entry.Parent != "" { + if entry.Parent != "" { path := parentPrefix + ts.SaltID(entry.Parent) + "/" + saltedId - if ts.view.Delete(path); err != nil { + if err = ts.view.Delete(path); err != nil { return fmt.Errorf("failed to delete entry: %v", err) } } // Clear the accessor index if any - if entry != nil && entry.Accessor != "" { + if entry.Accessor != "" { path := accessorPrefix + ts.SaltID(entry.Accessor) - if ts.view.Delete(path); err != nil { + if err = ts.view.Delete(path); err != nil { return fmt.Errorf("failed to delete entry: %v", err) } } - // Revoke all secrets under this token - if entry != nil { - if err := ts.expiration.RevokeByToken(entry); err != nil { - return err - } - } - - // Destroy the cubby space - err = ts.destroyCubbyhole(saltedId) - if err != nil { - return err + // Now that the entry is not usable for any revocation tasks, nuke it + path := lookupPrefix + saltedId + if err = ts.view.Delete(path); err != nil { + return fmt.Errorf("failed to delete entry: %v", err) } return nil @@ -993,7 +1080,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt // If we hit an error, assume it's a pre-struct straight token ID if err != nil { aEntry.TokenID = string(entry.Value) - te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID)) + te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID), false) if err != nil { return accessorEntry{}, fmt.Errorf("failed to look up token using accessor index: %s", err) } @@ -1637,8 +1724,13 @@ func (ts *TokenStore) handleLookup( return logical.ErrorResponse("missing token ID"), logical.ErrInvalidRequest } + lock := ts.getTokenLock(id) + lock.RLock() + defer lock.RUnlock() + // Lookup the token - out, err := ts.Lookup(id) + saltedId := ts.SaltID(id) + out, err := ts.lookupSalted(saltedId, true) if err != nil { return logical.ErrorResponse(err.Error()), logical.ErrInvalidRequest diff --git a/vault/token_store_test.go b/vault/token_store_test.go index 8840a6bd0..84228cdb9 100644 --- a/vault/token_store_test.go +++ b/vault/token_store_test.go @@ -5,6 +5,7 @@ import ( "fmt" "reflect" "strings" + "sync" "testing" "time" @@ -547,7 +548,7 @@ func TestTokenStore_UseToken(t *testing.T) { if te == nil { t.Fatalf("token entry for use #2 was nil") } - if te.NumUses != -1 { + if te.NumUses != tokenRevocationDeferred { t.Fatalf("token entry after use #2 did not have revoke flag") } ts.Revoke(te.ID) @@ -2987,3 +2988,163 @@ func TestTokenStore_AllowedDisallowedPolicies(t *testing.T) { t.Fatalf("expected an error") } } + +// Issue 2189 +func TestTokenStore_RevokeUseCountToken(t *testing.T) { + var resp *logical.Response + var err error + cubbyFuncLock := &sync.RWMutex{} + cubbyFuncLock.Lock() + + exp := mockExpiration(t) + ts := exp.tokenStore + root, _ := exp.tokenStore.rootToken() + + tokenReq := &logical.Request{ + Path: "create", + ClientToken: root.ID, + Operation: logical.UpdateOperation, + Data: map[string]interface{}{ + "num_uses": 1, + }, + } + resp, err = ts.HandleRequest(tokenReq) + if err != nil || (resp != nil && resp.IsError()) { + t.Fatalf("err:%v resp:%v", err, resp) + } + + tut := resp.Auth.ClientToken + saltTut := ts.SaltID(tut) + te, err := ts.lookupSalted(saltTut, false) + if err != nil { + t.Fatal(err) + } + if te == nil { + t.Fatal("nil entry") + } + if te.NumUses != 1 { + t.Fatalf("bad: %d", te.NumUses) + } + + te, err = ts.UseToken(te) + if err != nil { + t.Fatal(err) + } + if te == nil { + t.Fatal("nil entry") + } + if te.NumUses != tokenRevocationDeferred { + t.Fatalf("bad: %d", te.NumUses) + } + + // Should return no entry because it's tainted + te, err = ts.lookupSalted(saltTut, false) + if err != nil { + t.Fatal(err) + } + if te != nil { + t.Fatalf("%#v", te) + } + + // But it should show up in an API lookup call + req := &logical.Request{ + Path: "lookup-self", + ClientToken: tut, + Operation: logical.UpdateOperation, + } + resp, err = ts.HandleRequest(req) + if err != nil { + t.Fatal(err) + } + if resp == nil || resp.Data == nil || resp.Data["num_uses"] == nil { + t.Fatal("nil resp or data") + } + if resp.Data["num_uses"].(int) != -1 { + t.Fatalf("bad: %v", resp.Data["num_uses"]) + } + + // Should return tainted entries + te, err = ts.lookupSalted(saltTut, true) + if err != nil { + t.Fatal(err) + } + if te == nil { + t.Fatal("nil entry") + } + if te.NumUses != tokenRevocationDeferred { + t.Fatalf("bad: %d", te.NumUses) + } + + origDestroyCubbyhole := ts.cubbyholeDestroyer + + ts.cubbyholeDestroyer = func(*TokenStore, string) error { + return fmt.Errorf("keep it frosty") + } + + err = ts.revokeSalted(saltTut) + if err == nil { + t.Fatalf("expected err") + } + + // Since revocation failed we should see the tokenRevocationFailed canary value + te, err = ts.lookupSalted(saltTut, true) + if err != nil { + t.Fatal(err) + } + if te == nil { + t.Fatal("nil entry") + } + if te.NumUses != tokenRevocationFailed { + t.Fatalf("bad: %d", te.NumUses) + } + + // Check the race condition situation by making the process sleep + ts.cubbyholeDestroyer = func(*TokenStore, string) error { + time.Sleep(1 * time.Second) + return fmt.Errorf("keep it frosty") + } + cubbyFuncLock.Unlock() + + go func() { + cubbyFuncLock.RLock() + err := ts.revokeSalted(saltTut) + cubbyFuncLock.RUnlock() + if err == nil { + t.Fatalf("expected error") + } + }() + + // Give time for the function to start and grab locks + time.Sleep(200 * time.Millisecond) + te, err = ts.lookupSalted(saltTut, true) + if err != nil { + t.Fatal(err) + } + if te == nil { + t.Fatal("nil entry") + } + if te.NumUses != tokenRevocationInProgress { + t.Fatalf("bad: %d", te.NumUses) + } + + // Let things catch up + time.Sleep(2 * time.Second) + + // Put back to normal + cubbyFuncLock.Lock() + defer cubbyFuncLock.Unlock() + ts.cubbyholeDestroyer = origDestroyCubbyhole + + err = ts.revokeSalted(saltTut) + if err != nil { + t.Fatal(err) + } + + te, err = ts.lookupSalted(saltTut, true) + if err != nil { + t.Fatal(err) + } + if te != nil { + t.Fatal("found entry") + } +} diff --git a/version/version_base.go b/version/version_base.go index 0ef28f0da..a6d9ff9d4 100644 --- a/version/version_base.go +++ b/version/version_base.go @@ -9,5 +9,5 @@ func init() { // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. - VersionPrerelease = "dev" + VersionPrerelease = "" } diff --git a/website/config.rb b/website/config.rb index 4cf595220..936257277 100644 --- a/website/config.rb +++ b/website/config.rb @@ -2,7 +2,7 @@ set :base_url, "https://www.vaultproject.io/" activate :hashicorp do |h| h.name = "vault" - h.version = "0.6.3" + h.version = "0.6.4" h.github_slug = "hashicorp/vault" h.website_root = "website" end diff --git a/website/source/docs/install/upgrade-to-0.6.3.html.md b/website/source/docs/install/upgrade-to-0.6.3.html.md index 78ec1676f..d46df3439 100644 --- a/website/source/docs/install/upgrade-to-0.6.3.html.md +++ b/website/source/docs/install/upgrade-to-0.6.3.html.md @@ -3,7 +3,7 @@ layout: "install" page_title: "Upgrading to Vault 0.6.3" sidebar_current: "docs-install-upgrade-to-0.6.3" description: |- - Learn how to upgrade to Vault 0.63. + Learn how to upgrade to Vault 0.6.3. --- # Overview diff --git a/website/source/docs/install/upgrade-to-0.6.4.html.md b/website/source/docs/install/upgrade-to-0.6.4.html.md new file mode 100644 index 000000000..0a6f5a570 --- /dev/null +++ b/website/source/docs/install/upgrade-to-0.6.4.html.md @@ -0,0 +1,76 @@ +--- +layout: "install" +page_title: "Upgrading to Vault 0.6.4" +sidebar_current: "docs-install-upgrade-to-0.6.4" +description: |- + Learn how to upgrade to Vault 0.6.4. +--- + +# Overview + +This page contains the list of deprecations and important or breaking changes +for Vault 0.6.4. Please read it carefully. + +## Changes to `default` Policy Addition Rules for `auth/token/create` + +An issue was reported indicating that in some cases, the stated behavior of the +`default` policy being added to tokens unless specifically requested could lead +to a privilege escalation scenario if the parent token did not itself have the +`default` policy. In most cases (e.g. using the default `default` policy) the +escalated privileges are likely to not be dangerous, but this behavior is +undesired. + +As such, we have modified the rules around `default` in a few ways when +creating tokens using this endpoint: + +1. If a token creation request does not specify desired policies, the parent's + policies will be used (as normal) but `default` will not be automatically + added, so the child token will only have `default` if the parent does +2. If a token creation request specifies desired policies, `default` will be + added automatically but only if the parent token has `default` +3. If the token has `sudo` capability on the token creation endpoint, `default` + is always added; since `sudo` capability on this endpoint allows adding any + policy, this is not privilege escalation +4. When using token store roles, `default` is always added to the list of child + token policies unless `default` is contained in `disallowed_policies` + +**Regardless of provenance**, if a token being created has the `default` policy +and `no_default_policy` has been set true for the request, the `default` policy +will be stripped from the final set of policies. + +## Many Users Should Run `auth/token/cleanup` + +While investigating a report of accessor entries not being removed correctly +from Vault's data store, a security issue was discovered: if limited use-count +tokens are being used and expire due to the use-count dropping to zero (rather +than due to the token's TTL expiring), several pieces of Vault's token +revocation logic would not be properly run. This included cleaning up the +associated accessor entry from the data store, but more importantly, included +the logic used to immediately expire leases associated with the token. + +These leases would still be expired when their TTL ran out rather than live +forever, which limits the severity of this issue, but it is still a potentially +serious bug depending on each particular setup. + +To mitigate this issue we have taken the following steps: + +1. Updated the token revocation logic to be more resilient to failures that can + happen as it performs its various steps; the revocation will now properly + only be considered successful if no error has occurred, and the token entry + will remain in the data store (but not be usable for performing Vault + functions) until all aspects of revocation report success + +2. Added a function `auth/token/cleanup` that can both clean up the leaked + accessor entries and expire the leases associated with those accessors' + tokens. + +In most cases, running `auth/token/cleanup` should expire all outstanding +leases that were generated by these revoked limited-use-count tokens. + +If you are both using limited-use-count tokens *and* you are using them to +issue leased secrets, you should consider upgrading to 0.6.4 and running the +`auth/token/cleanup` endpoint immediately. + +Note: response wrapping tokens do not allow generating leases but have still +been subject to leaking the accessor entries, so if you have been a heavy user +of response wrapping you should still consider running the cleanup function. diff --git a/website/source/layouts/install.erb b/website/source/layouts/install.erb index 1e69ff73d..b169bfb2f 100644 --- a/website/source/layouts/install.erb +++ b/website/source/layouts/install.erb @@ -37,6 +37,9 @@ > Upgrade to 0.6.3 + > + Upgrade to 0.6.4 +