Fix revocation of leases when num_uses goes to 0 (#2190)

This commit is contained in:
Jeff Mitchell 2016-12-16 13:11:55 -05:00 committed by GitHub
parent 99eff7b2b4
commit f6044764c0
9 changed files with 397 additions and 50 deletions

View File

@ -214,9 +214,11 @@ func (m *ExpirationManager) revokeCommon(leaseID string, force, skipToken bool)
return err
}
// Delete the secondary index
if err := m.removeIndexByToken(le.ClientToken, le.LeaseID); err != nil {
return err
// Delete the secondary index, but only if it's a leased secret (not auth)
if le.Secret != nil {
if err := m.removeIndexByToken(le.ClientToken, le.LeaseID); err != nil {
return err
}
}
// Clear the expiration handler

View File

@ -705,6 +705,14 @@ func TestExpiration_revokeEntry_token(t *testing.T) {
t.Fatalf("err: %v", err)
}
// N.B.: Vault doesn't allow both a secret and auth to be returned, but the
// reason for both is that auth needs to be included in order to use the
// token store as it's the only mounted backend, *but* RegisterAuth doesn't
// actually create the index by token, only Register (for a Secret) does.
// So without the Secret we don't do anything when removing the index which
// (at the time of writing) now fails because a bug causing every token
// expiration to do an extra delete to a non-existent key has been fixed,
// and this test relies on this nonstandard behavior.
le := &leaseEntry{
LeaseID: "foo/bar/1234",
Auth: &logical.Auth{
@ -713,6 +721,11 @@ func TestExpiration_revokeEntry_token(t *testing.T) {
TTL: time.Minute,
},
},
Secret: &logical.Secret{
LeaseOptions: logical.LeaseOptions{
TTL: time.Minute,
},
},
ClientToken: root.ID,
Path: "foo/bar",
IssueTime: time.Now(),
@ -725,7 +738,7 @@ func TestExpiration_revokeEntry_token(t *testing.T) {
if err := exp.createIndexByToken(le.ClientToken, le.LeaseID); err != nil {
t.Fatalf("error creating secondary index: %v", err)
}
exp.updatePending(le, le.Auth.LeaseTotal())
exp.updatePending(le, le.Secret.LeaseTotal())
indexEntry, err := exp.indexByToken(le.ClientToken, le.LeaseID)
if err != nil {

View File

@ -40,6 +40,20 @@ const (
// rolesPrefix is the prefix used to store role information
rolesPrefix = "roles/"
// tokenRevocationDeferred indicates that the token should not be used
// again but is currently fulfilling its final use
tokenRevocationDeferred = -1
// tokenRevocationInProgress indicates that revocation of that token/its
// leases is ongoing
tokenRevocationInProgress = -2
// tokenRevocationFailed indicates that revocation failed; the entry is
// kept around so that when the cleanup function is run it can be tried
// again (or when the revocation function is run again), but all other uses
// will report the token invalid
tokenRevocationFailed = -3
)
var (
@ -48,6 +62,14 @@ var (
// pathSuffixSanitize is used to ensure a path suffix in a role is valid.
pathSuffixSanitize = regexp.MustCompile("\\w[\\w-.]+\\w")
destroyCubbyhole = func(ts *TokenStore, saltedID string) error {
if ts.cubbyholeBackend == nil {
// Should only ever happen in testing
return nil
}
return ts.cubbyholeBackend.revoke(salt.SaltID(ts.cubbyholeBackend.saltUUID, saltedID, salt.SHA1Hash))
}
)
// TokenStore is used to manage client tokens. Tokens are used for
@ -66,6 +88,8 @@ type TokenStore struct {
policyLookupFunc func(string) (*Policy, error)
tokenLocks map[string]*sync.RWMutex
cubbyholeDestroyer func(*TokenStore, string) error
}
// NewTokenStore is used to construct a token store that is
@ -76,7 +100,8 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error)
// Initialize the store
t := &TokenStore{
view: view,
view: view,
cubbyholeDestroyer: destroyCubbyhole,
}
if c.policyStore != nil {
@ -460,7 +485,13 @@ type TokenEntry struct {
// Used for operators to be able to associate with the source
DisplayName string `json:"display_name" mapstructure:"display_name" structs:"display_name"`
// Used to restrict the number of uses (zero is unlimited). This is to support one-time-tokens (generalized).
// Used to restrict the number of uses (zero is unlimited). This is to
// support one-time-tokens (generalized). There are a few special values:
// if it's -1 it has run through its use counts and is executing its final
// use; if it's -2 it is tainted, which means revocation is currently
// running on it; and if it's -3 it's also tainted but revocation
// previously ran and failed, so this hints the cleanup function to try it
// again.
NumUses int `json:"num_uses" mapstructure:"num_uses" structs:"num_uses"`
// Time of token creation
@ -722,7 +753,7 @@ func (ts *TokenStore) UseToken(te *TokenEntry) (*TokenEntry, error) {
defer lock.Unlock()
// Call lookupSalted instead of Lookup to avoid deadlocking since Lookup grabs a read lock
te, err := ts.lookupSalted(ts.SaltID(te.ID))
te, err := ts.lookupSalted(ts.SaltID(te.ID), false)
if err != nil {
return nil, fmt.Errorf("failed to refresh entry: %v", err)
}
@ -746,18 +777,9 @@ func (ts *TokenStore) UseToken(te *TokenEntry) (*TokenEntry, error) {
te.NumUses -= 1
}
// Marshal the entry
enc, err := json.Marshal(te)
err = ts.storeCommon(te, false)
if err != nil {
return nil, fmt.Errorf("failed to encode entry: %v", err)
}
// Write under the primary ID
saltedId := ts.SaltID(te.ID)
path := lookupPrefix + saltedId
le := &logical.StorageEntry{Key: path, Value: enc}
if err := ts.view.Put(le); err != nil {
return nil, fmt.Errorf("failed to persist entry: %v", err)
return nil, err
}
return te, nil
@ -783,11 +805,13 @@ func (ts *TokenStore) Lookup(id string) (*TokenEntry, error) {
lock.RLock()
defer lock.RUnlock()
return ts.lookupSalted(ts.SaltID(id))
return ts.lookupSalted(ts.SaltID(id), false)
}
// lookupSlated is used to find a token given its salted ID
func (ts *TokenStore) lookupSalted(saltedId string) (*TokenEntry, error) {
// lookupSalted is used to find a token given its salted ID. If tainted is
// true, entries that are in some revocation state (currently, indicated by num
// uses < 0), the entry will be returned anyways
func (ts *TokenStore) lookupSalted(saltedId string, tainted bool) (*TokenEntry, error) {
// Lookup token
path := lookupPrefix + saltedId
raw, err := ts.view.Get(path)
@ -806,8 +830,8 @@ func (ts *TokenStore) lookupSalted(saltedId string) (*TokenEntry, error) {
return nil, fmt.Errorf("failed to decode entry: %v", err)
}
// This is a token that is awaiting deferred revocation
if entry.NumUses == -1 {
// This is a token that is awaiting deferred revocation or tainted
if entry.NumUses < 0 && !tainted {
return nil, nil
}
@ -869,46 +893,109 @@ func (ts *TokenStore) Revoke(id string) error {
// revokeSalted is used to invalidate a given salted token,
// any child tokens will be orphaned.
func (ts *TokenStore) revokeSalted(saltedId string) error {
func (ts *TokenStore) revokeSalted(saltedId string) (ret error) {
// Protect the entry lookup/writing with locks. The rub here is that we
// don't know the ID until we look it up once, so first we look it up, then
// do a locked lookup.
entry, err := ts.lookupSalted(saltedId, true)
if err != nil {
return err
}
if entry == nil {
return nil
}
lock := ts.getTokenLock(entry.ID)
lock.Lock()
// Lookup the token first
entry, err := ts.lookupSalted(saltedId)
entry, err = ts.lookupSalted(saltedId, true)
if err != nil {
lock.Unlock()
return err
}
if entry == nil {
lock.Unlock()
return nil
}
// On failure we write -3, so if we hit -2 here we're already running a
// revocation operation. This can happen due to e.g. recursion into this
// function via the expiration manager's RevokeByToken.
if entry.NumUses == tokenRevocationInProgress {
lock.Unlock()
return nil
}
// This acts as a WAL. lookupSalted will no longer return this entry,
// so the token cannot be used, but this way we can keep the entry
// around until after the rest of this function is attempted, and a
// cleanup function can key off of this value to try again.
entry.NumUses = tokenRevocationInProgress
err = ts.storeCommon(entry, false)
lock.Unlock()
if err != nil {
return err
}
// Nuke the primary key first
path := lookupPrefix + saltedId
if ts.view.Delete(path); err != nil {
return fmt.Errorf("failed to delete entry: %v", err)
// If we are returning an error, mark the entry with -3 to indicate
// failed revocation. This way we don't try to clean up during active
// revocation (-2).
defer func() {
if ret != nil {
lock.Lock()
defer lock.Unlock()
// Lookup the token again to make sure something else didn't
// revoke in the interim
entry, err := ts.lookupSalted(saltedId, true)
if err != nil {
return
}
// If it exists just taint to -3 rather than trying to figure
// out what it means if it's already -3 after the -2 above
if entry != nil {
entry.NumUses = tokenRevocationFailed
ts.storeCommon(entry, false)
}
}
}()
// Destroy the token's cubby. This should go first as it's a
// security-sensitive item.
err = ts.cubbyholeDestroyer(ts, saltedId)
if err != nil {
return err
}
// Revoke all secrets under this token. This should go first as it's a
// security-sensitive item.
if err := ts.expiration.RevokeByToken(entry); err != nil {
return err
}
// Clear the secondary index if any
if entry != nil && entry.Parent != "" {
if entry.Parent != "" {
path := parentPrefix + ts.SaltID(entry.Parent) + "/" + saltedId
if ts.view.Delete(path); err != nil {
if err = ts.view.Delete(path); err != nil {
return fmt.Errorf("failed to delete entry: %v", err)
}
}
// Clear the accessor index if any
if entry != nil && entry.Accessor != "" {
if entry.Accessor != "" {
path := accessorPrefix + ts.SaltID(entry.Accessor)
if ts.view.Delete(path); err != nil {
if err = ts.view.Delete(path); err != nil {
return fmt.Errorf("failed to delete entry: %v", err)
}
}
// Revoke all secrets under this token
if entry != nil {
if err := ts.expiration.RevokeByToken(entry); err != nil {
return err
}
}
// Destroy the cubby space
err = ts.destroyCubbyhole(saltedId)
if err != nil {
return err
// Now that the entry is not usable for any revocation tasks, nuke it
path := lookupPrefix + saltedId
if err = ts.view.Delete(path); err != nil {
return fmt.Errorf("failed to delete entry: %v", err)
}
return nil
@ -993,7 +1080,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt
// If we hit an error, assume it's a pre-struct straight token ID
if err != nil {
aEntry.TokenID = string(entry.Value)
te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID))
te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID), false)
if err != nil {
return accessorEntry{}, fmt.Errorf("failed to look up token using accessor index: %s", err)
}
@ -1637,8 +1724,13 @@ func (ts *TokenStore) handleLookup(
return logical.ErrorResponse("missing token ID"), logical.ErrInvalidRequest
}
lock := ts.getTokenLock(id)
lock.RLock()
defer lock.RUnlock()
// Lookup the token
out, err := ts.Lookup(id)
saltedId := ts.SaltID(id)
out, err := ts.lookupSalted(saltedId, true)
if err != nil {
return logical.ErrorResponse(err.Error()), logical.ErrInvalidRequest

View File

@ -5,6 +5,7 @@ import (
"fmt"
"reflect"
"strings"
"sync"
"testing"
"time"
@ -547,7 +548,7 @@ func TestTokenStore_UseToken(t *testing.T) {
if te == nil {
t.Fatalf("token entry for use #2 was nil")
}
if te.NumUses != -1 {
if te.NumUses != tokenRevocationDeferred {
t.Fatalf("token entry after use #2 did not have revoke flag")
}
ts.Revoke(te.ID)
@ -2987,3 +2988,163 @@ func TestTokenStore_AllowedDisallowedPolicies(t *testing.T) {
t.Fatalf("expected an error")
}
}
// Issue 2189
func TestTokenStore_RevokeUseCountToken(t *testing.T) {
var resp *logical.Response
var err error
cubbyFuncLock := &sync.RWMutex{}
cubbyFuncLock.Lock()
exp := mockExpiration(t)
ts := exp.tokenStore
root, _ := exp.tokenStore.rootToken()
tokenReq := &logical.Request{
Path: "create",
ClientToken: root.ID,
Operation: logical.UpdateOperation,
Data: map[string]interface{}{
"num_uses": 1,
},
}
resp, err = ts.HandleRequest(tokenReq)
if err != nil || (resp != nil && resp.IsError()) {
t.Fatalf("err:%v resp:%v", err, resp)
}
tut := resp.Auth.ClientToken
saltTut := ts.SaltID(tut)
te, err := ts.lookupSalted(saltTut, false)
if err != nil {
t.Fatal(err)
}
if te == nil {
t.Fatal("nil entry")
}
if te.NumUses != 1 {
t.Fatalf("bad: %d", te.NumUses)
}
te, err = ts.UseToken(te)
if err != nil {
t.Fatal(err)
}
if te == nil {
t.Fatal("nil entry")
}
if te.NumUses != tokenRevocationDeferred {
t.Fatalf("bad: %d", te.NumUses)
}
// Should return no entry because it's tainted
te, err = ts.lookupSalted(saltTut, false)
if err != nil {
t.Fatal(err)
}
if te != nil {
t.Fatalf("%#v", te)
}
// But it should show up in an API lookup call
req := &logical.Request{
Path: "lookup-self",
ClientToken: tut,
Operation: logical.UpdateOperation,
}
resp, err = ts.HandleRequest(req)
if err != nil {
t.Fatal(err)
}
if resp == nil || resp.Data == nil || resp.Data["num_uses"] == nil {
t.Fatal("nil resp or data")
}
if resp.Data["num_uses"].(int) != -1 {
t.Fatalf("bad: %v", resp.Data["num_uses"])
}
// Should return tainted entries
te, err = ts.lookupSalted(saltTut, true)
if err != nil {
t.Fatal(err)
}
if te == nil {
t.Fatal("nil entry")
}
if te.NumUses != tokenRevocationDeferred {
t.Fatalf("bad: %d", te.NumUses)
}
origDestroyCubbyhole := ts.cubbyholeDestroyer
ts.cubbyholeDestroyer = func(*TokenStore, string) error {
return fmt.Errorf("keep it frosty")
}
err = ts.revokeSalted(saltTut)
if err == nil {
t.Fatalf("expected err")
}
// Since revocation failed we should see the tokenRevocationFailed canary value
te, err = ts.lookupSalted(saltTut, true)
if err != nil {
t.Fatal(err)
}
if te == nil {
t.Fatal("nil entry")
}
if te.NumUses != tokenRevocationFailed {
t.Fatalf("bad: %d", te.NumUses)
}
// Check the race condition situation by making the process sleep
ts.cubbyholeDestroyer = func(*TokenStore, string) error {
time.Sleep(1 * time.Second)
return fmt.Errorf("keep it frosty")
}
cubbyFuncLock.Unlock()
go func() {
cubbyFuncLock.RLock()
err := ts.revokeSalted(saltTut)
cubbyFuncLock.RUnlock()
if err == nil {
t.Fatalf("expected error")
}
}()
// Give time for the function to start and grab locks
time.Sleep(200 * time.Millisecond)
te, err = ts.lookupSalted(saltTut, true)
if err != nil {
t.Fatal(err)
}
if te == nil {
t.Fatal("nil entry")
}
if te.NumUses != tokenRevocationInProgress {
t.Fatalf("bad: %d", te.NumUses)
}
// Let things catch up
time.Sleep(2 * time.Second)
// Put back to normal
cubbyFuncLock.Lock()
defer cubbyFuncLock.Unlock()
ts.cubbyholeDestroyer = origDestroyCubbyhole
err = ts.revokeSalted(saltTut)
if err != nil {
t.Fatal(err)
}
te, err = ts.lookupSalted(saltTut, true)
if err != nil {
t.Fatal(err)
}
if te != nil {
t.Fatal("found entry")
}
}

View File

@ -9,5 +9,5 @@ func init() {
// A pre-release marker for the version. If this is "" (empty string)
// then it means that it is a final release. Otherwise, this is a pre-release
// such as "dev" (in development), "beta", "rc1", etc.
VersionPrerelease = "dev"
VersionPrerelease = ""
}

View File

@ -2,7 +2,7 @@ set :base_url, "https://www.vaultproject.io/"
activate :hashicorp do |h|
h.name = "vault"
h.version = "0.6.3"
h.version = "0.6.4"
h.github_slug = "hashicorp/vault"
h.website_root = "website"
end

View File

@ -3,7 +3,7 @@ layout: "install"
page_title: "Upgrading to Vault 0.6.3"
sidebar_current: "docs-install-upgrade-to-0.6.3"
description: |-
Learn how to upgrade to Vault 0.63.
Learn how to upgrade to Vault 0.6.3.
---
# Overview

View File

@ -0,0 +1,76 @@
---
layout: "install"
page_title: "Upgrading to Vault 0.6.4"
sidebar_current: "docs-install-upgrade-to-0.6.4"
description: |-
Learn how to upgrade to Vault 0.6.4.
---
# Overview
This page contains the list of deprecations and important or breaking changes
for Vault 0.6.4. Please read it carefully.
## Changes to `default` Policy Addition Rules for `auth/token/create`
An issue was reported indicating that in some cases, the stated behavior of the
`default` policy being added to tokens unless specifically requested could lead
to a privilege escalation scenario if the parent token did not itself have the
`default` policy. In most cases (e.g. using the default `default` policy) the
escalated privileges are likely to not be dangerous, but this behavior is
undesired.
As such, we have modified the rules around `default` in a few ways when
creating tokens using this endpoint:
1. If a token creation request does not specify desired policies, the parent's
policies will be used (as normal) but `default` will not be automatically
added, so the child token will only have `default` if the parent does
2. If a token creation request specifies desired policies, `default` will be
added automatically but only if the parent token has `default`
3. If the token has `sudo` capability on the token creation endpoint, `default`
is always added; since `sudo` capability on this endpoint allows adding any
policy, this is not privilege escalation
4. When using token store roles, `default` is always added to the list of child
token policies unless `default` is contained in `disallowed_policies`
**Regardless of provenance**, if a token being created has the `default` policy
and `no_default_policy` has been set true for the request, the `default` policy
will be stripped from the final set of policies.
## Many Users Should Run `auth/token/cleanup`
While investigating a report of accessor entries not being removed correctly
from Vault's data store, a security issue was discovered: if limited use-count
tokens are being used and expire due to the use-count dropping to zero (rather
than due to the token's TTL expiring), several pieces of Vault's token
revocation logic would not be properly run. This included cleaning up the
associated accessor entry from the data store, but more importantly, included
the logic used to immediately expire leases associated with the token.
These leases would still be expired when their TTL ran out rather than live
forever, which limits the severity of this issue, but it is still a potentially
serious bug depending on each particular setup.
To mitigate this issue we have taken the following steps:
1. Updated the token revocation logic to be more resilient to failures that can
happen as it performs its various steps; the revocation will now properly
only be considered successful if no error has occurred, and the token entry
will remain in the data store (but not be usable for performing Vault
functions) until all aspects of revocation report success
2. Added a function `auth/token/cleanup` that can both clean up the leaked
accessor entries and expire the leases associated with those accessors'
tokens.
In most cases, running `auth/token/cleanup` should expire all outstanding
leases that were generated by these revoked limited-use-count tokens.
If you are both using limited-use-count tokens *and* you are using them to
issue leased secrets, you should consider upgrading to 0.6.4 and running the
`auth/token/cleanup` endpoint immediately.
Note: response wrapping tokens do not allow generating leases but have still
been subject to leaking the accessor entries, so if you have been a heavy user
of response wrapping you should still consider running the cleanup function.

View File

@ -37,6 +37,9 @@
<li<%= sidebar_current("docs-install-upgrade-to-0.6.3") %>>
<a href="/docs/install/upgrade-to-0.6.3.html">Upgrade to 0.6.3</a>
</li>
<li<%= sidebar_current("docs-install-upgrade-to-0.6.4") %>>
<a href="/docs/install/upgrade-to-0.6.4.html">Upgrade to 0.6.4</a>
</li>
</ul>
</li>
</ul>