open-vault/vault/expiration.go

1028 lines
29 KiB
Go
Raw Normal View History

package vault
2015-03-13 17:55:54 +00:00
import (
"encoding/json"
"fmt"
"path"
"strings"
"sync"
"sync/atomic"
2015-03-13 17:55:54 +00:00
"time"
"github.com/armon/go-metrics"
2016-08-19 20:45:17 +00:00
log "github.com/mgutz/logxi/v1"
"github.com/hashicorp/errwrap"
2017-03-07 20:22:21 +00:00
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-uuid"
"github.com/hashicorp/vault/helper/consts"
"github.com/hashicorp/vault/helper/jsonutil"
2017-03-07 20:22:21 +00:00
"github.com/hashicorp/vault/helper/locksutil"
"github.com/hashicorp/vault/logical"
2015-03-13 17:55:54 +00:00
)
2015-03-13 01:38:15 +00:00
const (
// expirationSubPath is the sub-path used for the expiration manager
// view. This is nested under the system view.
expirationSubPath = "expire/"
// leaseViewPrefix is the prefix used for the ID based lookup of leases.
leaseViewPrefix = "id/"
// tokenViewPrefix is the prefix used for the token based lookup of leases.
tokenViewPrefix = "token/"
// maxRevokeAttempts limits how many revoke attempts are made
maxRevokeAttempts = 6
// revokeRetryBase is a baseline retry time
revokeRetryBase = 10 * time.Second
// minRevokeDelay is used to prevent an instant revoke on restore
minRevokeDelay = 5 * time.Second
// maxLeaseDuration is the default maximum lease duration
maxLeaseTTL = 32 * 24 * time.Hour
// defaultLeaseDuration is the default lease duration used when no lease is specified
defaultLeaseTTL = maxLeaseTTL
)
// ExpirationManager is used by the Core to manage leases. Secrets
// can provide a lease, meaning that they can be renewed or revoked.
// If a secret is not renewed in timely manner, it may be expired, and
// the ExpirationManager will handle doing automatic revocation.
type ExpirationManager struct {
router *Router
idView *BarrierView
tokenView *BarrierView
tokenStore *TokenStore
2016-08-19 20:45:17 +00:00
logger log.Logger
2015-03-16 01:06:19 +00:00
pending map[string]*time.Timer
pendingLock sync.Mutex
tidyLock int64
}
// NewExpirationManager creates a new ExpirationManager that is backed
2015-03-13 01:38:15 +00:00
// using a given view, and uses the provided router for revocation.
2016-08-19 20:45:17 +00:00
func NewExpirationManager(router *Router, view *BarrierView, ts *TokenStore, logger log.Logger) *ExpirationManager {
2015-03-16 01:06:19 +00:00
if logger == nil {
2016-08-19 20:45:17 +00:00
logger = log.New("expiration_manager")
2015-03-16 01:06:19 +00:00
}
exp := &ExpirationManager{
router: router,
idView: view.SubView(leaseViewPrefix),
tokenView: view.SubView(tokenViewPrefix),
tokenStore: ts,
logger: logger,
pending: make(map[string]*time.Timer),
}
return exp
}
// setupExpiration is invoked after we've loaded the mount table to
// initialize the expiration manager
func (c *Core) setupExpiration() error {
c.metricsMutex.Lock()
defer c.metricsMutex.Unlock()
// Create a sub-view
view := c.systemBarrierView.SubView(expirationSubPath)
// Create the manager
mgr := NewExpirationManager(c.router, view, c.tokenStore, c.logger)
c.expiration = mgr
// Link the token store to this
c.tokenStore.SetExpirationManager(mgr)
// Restore the existing state
2017-02-06 23:30:13 +00:00
c.logger.Info("expiration: restoring leases")
if err := c.expiration.Restore(); err != nil {
return fmt.Errorf("expiration state restore failed: %v", err)
}
return nil
}
// stopExpiration is used to stop the expiration manager before
// sealing the Vault.
func (c *Core) stopExpiration() error {
if c.expiration != nil {
if err := c.expiration.Stop(); err != nil {
return err
}
c.metricsMutex.Lock()
defer c.metricsMutex.Unlock()
c.expiration = nil
}
return nil
}
2017-05-04 16:11:00 +00:00
// Tidy cleans up the dangling storage entries for leases. It scans the storage
// view to find all the available leases, checks if the token embedded in it is
// either empty or invalid and in both the cases, it revokes them. It also uses
// a token cache to avoid multiple lookups of the same token ID. It is normally
// not required to use the API that invokes this. This is only intended to
// clean up the corrupt storage due to bugs.
2017-03-07 20:22:21 +00:00
func (m *ExpirationManager) Tidy() error {
var tidyErrors *multierror.Error
if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) {
m.logger.Debug("expiration: tidy operation on leases is already in progress")
return fmt.Errorf("tidy operation on leases is already in progress")
}
defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0)
m.logger.Debug("expiration: beginning tidy operation on leases")
2017-04-26 20:54:48 +00:00
// Create a cache to keep track of looked up tokens
tokenCache := make(map[string]bool)
countLease := 0
deletedCountEmptyToken := 0
deletedCountInvalidToken := 0
2017-04-26 20:54:48 +00:00
2017-03-07 20:22:21 +00:00
tidyFunc := func(leaseID string) {
countLease++
if countLease%500 == 0 {
m.logger.Debug("expiration: tidying leases", "progress", countLease)
}
2017-03-07 20:22:21 +00:00
le, err := m.loadEntry(leaseID)
if err != nil {
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err))
return
}
if le == nil {
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("nil entry for lease ID %q: %v", leaseID, err))
return
}
var isValid, ok bool
revokeLease := false
2017-03-07 20:22:21 +00:00
if le.ClientToken == "" {
m.logger.Debug("expiration: revoking lease which has an empty token", "lease_id", leaseID)
revokeLease = true
deletedCountEmptyToken++
goto REVOKE_CHECK
2017-03-07 20:22:21 +00:00
}
isValid, ok = tokenCache[le.ClientToken]
if !ok {
saltedID := m.tokenStore.SaltID(le.ClientToken)
lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken)
lock.RLock()
te, err := m.tokenStore.lookupSalted(saltedID, true)
lock.RUnlock()
2017-04-26 20:54:48 +00:00
if err != nil {
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err))
return
}
2017-04-27 15:08:11 +00:00
if te == nil {
m.logger.Debug("expiration: revoking lease which holds an invalid token", "lease_id", leaseID)
revokeLease = true
deletedCountInvalidToken++
tokenCache[le.ClientToken] = false
} else {
tokenCache[le.ClientToken] = true
}
2017-05-05 14:26:40 +00:00
goto REVOKE_CHECK
2017-04-27 15:08:11 +00:00
} else {
if isValid {
return
} else {
m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID)
revokeLease = true
2017-05-04 16:41:15 +00:00
deletedCountInvalidToken++
}
2017-05-05 14:26:40 +00:00
goto REVOKE_CHECK
}
REVOKE_CHECK:
if revokeLease {
2017-03-07 20:22:21 +00:00
// Force the revocation and skip going through the token store
// again
err = m.revokeCommon(leaseID, true, true)
if err != nil {
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err))
return
}
}
}
if err := logical.ScanView(m.idView, tidyFunc); err != nil {
return err
2017-03-07 20:22:21 +00:00
}
m.logger.Debug("expiration: ending tidy operation on leases")
m.logger.Debug("expiration: number of leases scanned", "total", countLease)
m.logger.Debug("expiration: number of revoked leases which had empty tokens", "deleted_count", deletedCountEmptyToken)
m.logger.Debug("expiration: number of revoked leases which had invalid tokens", "deleted_count", deletedCountInvalidToken)
return tidyErrors.ErrorOrNil()
2017-03-07 20:22:21 +00:00
}
// Restore is used to recover the lease states when starting.
// This is used after starting the vault.
func (m *ExpirationManager) Restore() error {
m.pendingLock.Lock()
defer m.pendingLock.Unlock()
// Accumulate existing leases
2017-02-06 23:30:13 +00:00
m.logger.Debug("expiration: collecting leases")
2017-01-06 20:42:18 +00:00
existing, err := logical.CollectKeys(m.idView)
2015-03-18 19:03:33 +00:00
if err != nil {
return fmt.Errorf("failed to scan for leases: %v", err)
}
2017-02-06 23:30:13 +00:00
m.logger.Debug("expiration: leases collected", "num_existing", len(existing))
// Make the channels used for the worker pool
broker := make(chan string)
quit := make(chan bool)
// Buffer these channels to prevent deadlocks
errs := make(chan error, len(existing))
result := make(chan *leaseEntry, len(existing))
// Use a wait group
wg := &sync.WaitGroup{}
// Create 64 workers to distribute work to
for i := 0; i < consts.ExpirationRestoreWorkerCount; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case leaseID, ok := <-broker:
// broker has been closed, we are done
if !ok {
return
}
le, err := m.loadEntry(leaseID)
if err != nil {
errs <- err
continue
}
// Write results out to the result channel
result <- le
// quit early
case <-quit:
return
}
}
}()
}
// Distribute the collected keys to the workers in a go routine
wg.Add(1)
go func() {
defer wg.Done()
for i, leaseID := range existing {
if i%500 == 0 {
m.logger.Trace("expiration: leases loading", "progress", i)
}
select {
case <-quit:
return
default:
broker <- leaseID
}
2017-02-06 23:30:13 +00:00
}
// Close the broker, causing worker routines to exit
close(broker)
}()
// Restore each key by pulling from the result chan
for i := 0; i < len(existing); i++ {
select {
case err := <-errs:
// Close all go routines
close(quit)
return err
case le := <-result:
// If there is no entry, nothing to restore
if le == nil {
continue
}
// If there is no expiry time, don't do anything
if le.ExpireTime.IsZero() {
continue
}
// Determine the remaining time to expiration
expires := le.ExpireTime.Sub(time.Now())
if expires <= 0 {
expires = minRevokeDelay
}
// Setup revocation timer
m.pending[le.LeaseID] = time.AfterFunc(expires, func() {
m.expireID(le.LeaseID)
})
}
}
// Let all go routines finish
wg.Wait()
if len(m.pending) > 0 {
2016-08-19 20:45:17 +00:00
if m.logger.IsInfo() {
m.logger.Info("expire: leases restored", "restored_lease_count", len(m.pending))
}
}
return nil
}
// Stop is used to prevent further automatic revocations.
// This must be called before sealing the view.
func (m *ExpirationManager) Stop() error {
2015-03-16 01:06:19 +00:00
// Stop all the pending expiration timers
m.pendingLock.Lock()
for _, timer := range m.pending {
timer.Stop()
}
2015-03-16 01:06:19 +00:00
m.pending = make(map[string]*time.Timer)
m.pendingLock.Unlock()
return nil
}
2015-03-13 01:38:15 +00:00
// Revoke is used to revoke a secret named by the given LeaseID
func (m *ExpirationManager) Revoke(leaseID string) error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "revoke"}, time.Now())
return m.revokeCommon(leaseID, false, false)
}
// revokeCommon does the heavy lifting. If force is true, we ignore a problem
// during revocation and still remove entries/index/lease timers
func (m *ExpirationManager) revokeCommon(leaseID string, force, skipToken bool) error {
defer metrics.MeasureSince([]string{"expire", "revoke-common"}, time.Now())
// Load the entry
le, err := m.loadEntry(leaseID)
if err != nil {
return err
}
// If there is no entry, nothing to revoke
if le == nil {
return nil
}
// Revoke the entry
if !skipToken || le.Auth == nil {
if err := m.revokeEntry(le); err != nil {
if !force {
return err
} else {
2016-08-19 20:45:17 +00:00
if m.logger.IsWarn() {
m.logger.Warn("revocation from the backend failed, but in force mode so ignoring", "error", err)
}
}
}
}
// Delete the entry
if err := m.deleteEntry(leaseID); err != nil {
return err
}
// Delete the secondary index, but only if it's a leased secret (not auth)
if le.Secret != nil {
if err := m.removeIndexByToken(le.ClientToken, le.LeaseID); err != nil {
return err
}
2015-04-10 21:48:08 +00:00
}
// Clear the expiration handler
m.pendingLock.Lock()
if timer, ok := m.pending[leaseID]; ok {
timer.Stop()
delete(m.pending, leaseID)
}
m.pendingLock.Unlock()
2015-03-13 01:38:15 +00:00
return nil
}
// RevokeForce works similarly to RevokePrefix but continues in the case of a
// revocation error; this is mostly meant for recovery operations
func (m *ExpirationManager) RevokeForce(prefix string) error {
defer metrics.MeasureSince([]string{"expire", "revoke-force"}, time.Now())
return m.revokePrefixCommon(prefix, true)
}
2015-03-13 01:38:15 +00:00
// RevokePrefix is used to revoke all secrets with a given prefix.
// The prefix maps to that of the mount table to make this simpler
// to reason about.
func (m *ExpirationManager) RevokePrefix(prefix string) error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "revoke-prefix"}, time.Now())
2015-03-16 21:59:37 +00:00
return m.revokePrefixCommon(prefix, false)
}
// RevokeByToken is used to revoke all the secrets issued with a given token.
// This is done by using the secondary index. It also removes the lease entry
// for the token itself. As a result it should *ONLY* ever be called from the
// token store's revokeSalted function.
func (m *ExpirationManager) RevokeByToken(te *TokenEntry) error {
defer metrics.MeasureSince([]string{"expire", "revoke-by-token"}, time.Now())
// Lookup the leases
existing, err := m.lookupByToken(te.ID)
2015-03-18 19:03:33 +00:00
if err != nil {
return fmt.Errorf("failed to scan for leases: %v", err)
}
// Revoke all the keys
for idx, leaseID := range existing {
if err := m.Revoke(leaseID); err != nil {
return fmt.Errorf("failed to revoke '%s' (%d / %d): %v",
leaseID, idx+1, len(existing), err)
}
}
if te.Path != "" {
tokenLeaseID := path.Join(te.Path, m.tokenStore.SaltID(te.ID))
// We want to skip the revokeEntry call as that will call back into
// revocation logic in the token store, which is what is running this
// function in the first place -- it'd be a deadlock loop. Since the only
// place that this function is called is revokeSalted in the token store,
// we're already revoking the token, so we just want to clean up the lease.
// This avoids spurious revocations later in the log when the timer runs
// out, and eases up resource usage.
return m.revokeCommon(tokenLeaseID, false, true)
}
return nil
2015-03-13 01:38:15 +00:00
}
func (m *ExpirationManager) revokePrefixCommon(prefix string, force bool) error {
// Ensure there is a trailing slash
if !strings.HasSuffix(prefix, "/") {
prefix = prefix + "/"
}
// Accumulate existing leases
sub := m.idView.SubView(prefix)
2017-01-06 20:42:18 +00:00
existing, err := logical.CollectKeys(sub)
2015-04-10 21:48:08 +00:00
if err != nil {
return fmt.Errorf("failed to scan for leases: %v", err)
}
// Revoke all the keys
for idx, suffix := range existing {
leaseID := prefix + suffix
if err := m.revokeCommon(leaseID, force, false); err != nil {
2015-04-10 21:48:08 +00:00
return fmt.Errorf("failed to revoke '%s' (%d / %d): %v",
leaseID, idx+1, len(existing), err)
}
}
return nil
}
// Renew is used to renew a secret using the given leaseID
2015-03-13 01:38:15 +00:00
// and a renew interval. The increment may be ignored.
func (m *ExpirationManager) Renew(leaseID string, increment time.Duration) (*logical.Response, error) {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "renew"}, time.Now())
// Load the entry
le, err := m.loadEntry(leaseID)
if err != nil {
return nil, err
}
2015-04-09 21:23:37 +00:00
// Check if the lease is renewable
if _, err := le.renewable(); err != nil {
2015-04-09 21:23:37 +00:00
return nil, err
2015-04-09 00:03:46 +00:00
}
2015-03-16 20:29:51 +00:00
// Attempt to renew the entry
2015-03-16 21:59:37 +00:00
resp, err := m.renewEntry(le, increment)
2015-03-16 20:29:51 +00:00
if err != nil {
return nil, err
}
2015-03-16 20:29:51 +00:00
// Fast-path if there is no lease
2015-04-09 19:29:13 +00:00
if resp == nil || resp.Secret == nil || !resp.Secret.LeaseEnabled() {
2015-03-16 20:29:51 +00:00
return resp, nil
}
2015-03-16 20:29:51 +00:00
// Validate the lease
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
if err := resp.Secret.Validate(); err != nil {
2015-03-16 20:29:51 +00:00
return nil, err
}
// Attach the LeaseID
resp.Secret.LeaseID = leaseID
2015-03-16 23:11:55 +00:00
// Update the lease entry
2015-03-16 20:29:51 +00:00
le.Data = resp.Data
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
le.Secret = resp.Secret
2015-04-09 19:29:13 +00:00
le.ExpireTime = resp.Secret.ExpirationTime()
le.LastRenewalTime = time.Now()
if err := m.persistEntry(le); err != nil {
return nil, err
}
// Update the expiration time
m.updatePending(le, resp.Secret.LeaseTotal())
2015-03-16 20:29:51 +00:00
// Return the response
return resp, nil
2015-03-13 01:38:15 +00:00
}
// RenewToken is used to renew a token which does not need to
// invoke a logical backend.
2016-02-18 16:22:04 +00:00
func (m *ExpirationManager) RenewToken(req *logical.Request, source string, token string,
increment time.Duration) (*logical.Response, error) {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "renew-token"}, time.Now())
// Compute the Lease ID
leaseID := path.Join(source, m.tokenStore.SaltID(token))
// Load the entry
le, err := m.loadEntry(leaseID)
if err != nil {
2015-04-06 23:35:39 +00:00
return nil, err
}
// Check if the lease is renewable. Note that this also checks for a nil
// lease and errors in that case as well.
if _, err := le.renewable(); err != nil {
return logical.ErrorResponse(err.Error()), logical.ErrInvalidRequest
}
2015-04-09 21:23:37 +00:00
// Attempt to renew the auth entry
2016-02-18 16:22:04 +00:00
resp, err := m.renewAuthEntry(req, le, increment)
2015-04-09 21:23:37 +00:00
if err != nil {
return nil, err
}
2015-04-09 21:23:37 +00:00
if resp == nil {
return nil, nil
}
if resp.IsError() {
2016-03-04 20:35:58 +00:00
return &logical.Response{
Data: resp.Data,
}, nil
}
2015-04-09 21:23:37 +00:00
if resp.Auth == nil || !resp.Auth.LeaseEnabled() {
2016-03-04 23:08:13 +00:00
return &logical.Response{
Auth: resp.Auth,
}, nil
2015-04-09 00:03:46 +00:00
}
2015-04-09 21:23:37 +00:00
// Attach the ClientToken
resp.Auth.ClientToken = token
resp.Auth.Increment = 0
2015-04-09 21:23:37 +00:00
// Update the lease entry
2015-04-09 21:23:37 +00:00
le.Auth = resp.Auth
le.ExpireTime = resp.Auth.ExpirationTime()
le.LastRenewalTime = time.Now()
if err := m.persistEntry(le); err != nil {
2015-04-06 23:35:39 +00:00
return nil, err
}
// Update the expiration time
2015-04-09 21:23:37 +00:00
m.updatePending(le, resp.Auth.LeaseTotal())
2016-03-04 20:35:58 +00:00
return &logical.Response{
Auth: resp.Auth,
}, nil
}
2015-03-13 01:38:15 +00:00
// Register is used to take a request and response with an associated
// lease. The secret gets assigned a LeaseID and the management of
2015-03-13 01:38:15 +00:00
// of lease is assumed by the expiration manager.
func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "register"}, time.Now())
if req.ClientToken == "" {
return "", fmt.Errorf("expiration: cannot register a lease with an empty client token")
}
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
// Ignore if there is no leased secret
if resp == nil || resp.Secret == nil {
2015-03-13 17:55:54 +00:00
return "", nil
}
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
// Validate the secret
if err := resp.Secret.Validate(); err != nil {
2015-03-13 17:55:54 +00:00
return "", err
}
// Create a lease entry
leaseUUID, err := uuid.GenerateUUID()
if err != nil {
return "", err
}
leaseID := path.Join(req.Path, leaseUUID)
defer func() {
2017-05-04 15:54:57 +00:00
// If there is an error we want to rollback as much as possible (note
// that errors here are ignored to do as much cleanup as we can). We
// want to revoke a generated secret (since an error means we may not
// be successfully tracking it), remove indexes, and delete the entry.
if retErr != nil {
revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data))
if err != nil {
2017-05-04 16:44:31 +00:00
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional internal error was encountered revoking the newly-generated secret: {{err}}", err))
} else if revResp != nil && revResp.IsError() {
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error()))
}
if err := m.deleteEntry(leaseID); err != nil {
2017-05-04 16:44:31 +00:00
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered deleting any lease associated with the newly-generated secret: {{err}}", err))
}
if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil {
2017-05-04 16:44:31 +00:00
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err))
}
}
}()
2015-03-13 17:55:54 +00:00
le := leaseEntry{
LeaseID: leaseID,
2015-04-10 21:48:08 +00:00
ClientToken: req.ClientToken,
Path: req.Path,
Data: resp.Data,
Secret: resp.Secret,
IssueTime: time.Now(),
2015-04-10 21:48:08 +00:00
ExpireTime: resp.Secret.ExpirationTime(),
2015-03-13 17:55:54 +00:00
}
2015-03-16 01:06:19 +00:00
// Encode the entry
if err := m.persistEntry(&le); err != nil {
return "", err
}
2015-04-10 21:48:08 +00:00
// Maintain secondary index by token
if err := m.createIndexByToken(le.ClientToken, le.LeaseID); err != nil {
2015-04-10 21:48:08 +00:00
return "", err
}
// Setup revocation timer if there is a lease
m.updatePending(&le, resp.Secret.LeaseTotal())
2015-03-16 01:06:19 +00:00
// Done
return le.LeaseID, nil
2015-03-16 01:06:19 +00:00
}
// RegisterAuth is used to take an Auth response with an associated lease.
// The token does not get a LeaseID, but the lease management is handled by
// the expiration manager.
func (m *ExpirationManager) RegisterAuth(source string, auth *logical.Auth) error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"expire", "register-auth"}, time.Now())
if auth.ClientToken == "" {
return fmt.Errorf("expiration: cannot register an auth lease with an empty token")
}
// Create a lease entry
le := leaseEntry{
2015-04-10 21:48:08 +00:00
LeaseID: path.Join(source, m.tokenStore.SaltID(auth.ClientToken)),
ClientToken: auth.ClientToken,
Auth: auth,
Path: source,
IssueTime: time.Now(),
2015-04-10 21:48:08 +00:00
ExpireTime: auth.ExpirationTime(),
}
// Encode the entry
if err := m.persistEntry(&le); err != nil {
return err
}
// Setup revocation timer
m.updatePending(&le, auth.LeaseTotal())
return nil
}
// FetchLeaseTimesByToken is a helper function to use token values to compute
// the leaseID, rather than pushing that logic back into the token store.
func (m *ExpirationManager) FetchLeaseTimesByToken(source, token string) (*leaseEntry, error) {
defer metrics.MeasureSince([]string{"expire", "fetch-lease-times-by-token"}, time.Now())
// Compute the Lease ID
leaseID := path.Join(source, m.tokenStore.SaltID(token))
return m.FetchLeaseTimes(leaseID)
}
// FetchLeaseTimes is used to fetch the issue time, expiration time, and last
// renewed time of a lease entry. It returns a leaseEntry itself, but with only
// those values copied over.
func (m *ExpirationManager) FetchLeaseTimes(leaseID string) (*leaseEntry, error) {
defer metrics.MeasureSince([]string{"expire", "fetch-lease-times"}, time.Now())
// Load the entry
le, err := m.loadEntry(leaseID)
if err != nil {
return nil, err
}
if le == nil {
return nil, nil
}
ret := &leaseEntry{
IssueTime: le.IssueTime,
ExpireTime: le.ExpireTime,
LastRenewalTime: le.LastRenewalTime,
}
if le.Secret != nil {
ret.Secret = &logical.Secret{}
ret.Secret.Renewable = le.Secret.Renewable
ret.Secret.TTL = le.Secret.TTL
}
if le.Auth != nil {
ret.Auth = &logical.Auth{}
ret.Auth.Renewable = le.Auth.Renewable
ret.Auth.TTL = le.Auth.TTL
}
return ret, nil
}
// updatePending is used to update a pending invocation for a lease
func (m *ExpirationManager) updatePending(le *leaseEntry, leaseTotal time.Duration) {
m.pendingLock.Lock()
defer m.pendingLock.Unlock()
// Check for an existing timer
timer, ok := m.pending[le.LeaseID]
// Create entry if it does not exist
if !ok && leaseTotal > 0 {
timer := time.AfterFunc(leaseTotal, func() {
2015-04-08 22:43:26 +00:00
m.expireID(le.LeaseID)
})
m.pending[le.LeaseID] = timer
return
}
// Delete the timer if the expiration time is zero
if ok && leaseTotal == 0 {
timer.Stop()
delete(m.pending, le.LeaseID)
return
}
// Extend the timer by the lease total
if ok && leaseTotal > 0 {
timer.Reset(leaseTotal)
2015-04-08 22:43:26 +00:00
}
}
2015-03-16 01:06:19 +00:00
// expireID is invoked when a given ID is expired
func (m *ExpirationManager) expireID(leaseID string) {
2015-03-16 01:06:19 +00:00
// Clear from the pending expiration
m.pendingLock.Lock()
delete(m.pending, leaseID)
2015-03-16 01:06:19 +00:00
m.pendingLock.Unlock()
for attempt := uint(0); attempt < maxRevokeAttempts; attempt++ {
err := m.Revoke(leaseID)
if err == nil {
2016-08-19 20:45:17 +00:00
if m.logger.IsInfo() {
m.logger.Info("expire: revoked lease", "lease_id", leaseID)
}
return
}
2016-08-19 20:45:17 +00:00
m.logger.Error("expire: failed to revoke lease", "lease_id", leaseID, "error", err)
time.Sleep((1 << attempt) * revokeRetryBase)
2015-03-16 01:06:19 +00:00
}
2016-08-19 20:45:17 +00:00
m.logger.Error("expire: maximum revoke attempts reached", "lease_id", leaseID)
2015-03-16 01:06:19 +00:00
}
// revokeEntry is used to attempt revocation of an internal entry
func (m *ExpirationManager) revokeEntry(le *leaseEntry) error {
// Revocation of login tokens is special since we can by-pass the
// backend and directly interact with the token store
if le.Auth != nil {
if err := m.tokenStore.RevokeTree(le.ClientToken); err != nil {
return fmt.Errorf("failed to revoke token: %v", err)
}
return nil
}
// Handle standard revocation via backends
resp, err := m.router.Route(logical.RevokeRequest(
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
le.Path, le.Secret, le.Data))
if err != nil || (resp != nil && resp.IsError()) {
return fmt.Errorf("failed to revoke entry: resp:%#v err:%s", resp, err)
}
return nil
2015-03-16 01:06:19 +00:00
}
2015-03-16 20:29:51 +00:00
// renewEntry is used to attempt renew of an internal entry
2015-03-16 21:59:37 +00:00
func (m *ExpirationManager) renewEntry(le *leaseEntry, increment time.Duration) (*logical.Response, error) {
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
secret := *le.Secret
secret.IssueTime = le.IssueTime
secret.Increment = increment
secret.LeaseID = ""
vault: clean up VaultID duplications, make secret responses clearer /cc @armon - This is a reasonably major refactor that I think cleans up a lot of the logic with secrets in responses. The reason for the refactor is that while implementing Renew/Revoke in logical/framework I found the existing API to be really awkward to work with. Primarily, we needed a way to send down internal data for Vault core to store since not all the data you need to revoke a key is always sent down to the user (for example the user than AWS key belongs to). At first, I was doing this manually in logical/framework with req.Storage, but this is going to be such a common event that I think its something core should assist with. Additionally, I think the added context for secrets will be useful in the future when we have a Vault API for returning orphaned out keys: we can also return the internal data that might help an operator. So this leads me to this refactor. I've removed most of the fields in `logical.Response` and replaced it with a single `*Secret` pointer. If this is non-nil, then the response represents a secret. The Secret struct encapsulates all the lease info and such. It also has some fields on it that are only populated at _request_ time for Revoke/Renew operations. There is precedent for this sort of behavior in the Go stdlib where http.Request/http.Response have fields that differ based on client/server. I copied this style. All core unit tests pass. The APIs fail for obvious reasons but I'll fix that up in the next commit.
2015-03-19 22:11:42 +00:00
2015-04-09 21:23:37 +00:00
req := logical.RenewRequest(le.Path, &secret, le.Data)
resp, err := m.router.Route(req)
if err != nil || (resp != nil && resp.IsError()) {
return nil, fmt.Errorf("failed to renew entry: resp:%#v err:%s", resp, err)
2015-04-09 21:23:37 +00:00
}
return resp, nil
}
2016-03-09 16:07:13 +00:00
// renewAuthEntry is used to attempt renew of an auth entry. Only the token
// store should get the actual token ID intact.
2016-02-18 16:22:04 +00:00
func (m *ExpirationManager) renewAuthEntry(req *logical.Request, le *leaseEntry, increment time.Duration) (*logical.Response, error) {
2015-04-09 21:23:37 +00:00
auth := *le.Auth
auth.IssueTime = le.IssueTime
auth.Increment = increment
2016-03-09 16:07:13 +00:00
if strings.HasPrefix(le.Path, "auth/token/") {
auth.ClientToken = le.ClientToken
} else {
auth.ClientToken = ""
}
2015-04-09 21:23:37 +00:00
2016-02-18 16:22:04 +00:00
authReq := logical.RenewAuthRequest(le.Path, &auth, nil)
authReq.Connection = req.Connection
resp, err := m.router.Route(authReq)
2015-03-16 20:29:51 +00:00
if err != nil {
return nil, fmt.Errorf("failed to renew entry: %v", err)
}
return resp, nil
}
2015-03-16 01:06:19 +00:00
// loadEntry is used to read a lease entry
func (m *ExpirationManager) loadEntry(leaseID string) (*leaseEntry, error) {
out, err := m.idView.Get(leaseID)
2015-03-16 01:06:19 +00:00
if err != nil {
return nil, fmt.Errorf("failed to read lease entry: %v", err)
}
if out == nil {
return nil, nil
}
le, err := decodeLeaseEntry(out.Value)
if err != nil {
return nil, fmt.Errorf("failed to decode lease entry: %v", err)
}
return le, nil
}
// persistEntry is used to persist a lease entry
func (m *ExpirationManager) persistEntry(le *leaseEntry) error {
2015-03-13 17:55:54 +00:00
// Encode the entry
buf, err := le.encode()
if err != nil {
2015-03-16 01:06:19 +00:00
return fmt.Errorf("failed to encode lease entry: %v", err)
2015-03-13 17:55:54 +00:00
}
// Write out to the view
ent := logical.StorageEntry{
Key: le.LeaseID,
2015-03-13 17:55:54 +00:00
Value: buf,
}
if err := m.idView.Put(&ent); err != nil {
2015-03-16 01:06:19 +00:00
return fmt.Errorf("failed to persist lease entry: %v", err)
2015-03-13 17:55:54 +00:00
}
2015-03-16 01:06:19 +00:00
return nil
}
2015-03-13 17:55:54 +00:00
2015-03-16 01:06:19 +00:00
// deleteEntry is used to delete a lease entry
func (m *ExpirationManager) deleteEntry(leaseID string) error {
if err := m.idView.Delete(leaseID); err != nil {
2015-03-16 01:06:19 +00:00
return fmt.Errorf("failed to delete lease entry: %v", err)
}
return nil
2015-03-13 17:55:54 +00:00
}
// createIndexByToken creates a secondary index from the token to a lease entry
func (m *ExpirationManager) createIndexByToken(token, leaseID string) error {
2015-04-10 21:48:08 +00:00
ent := logical.StorageEntry{
Key: m.tokenStore.SaltID(token) + "/" + m.tokenStore.SaltID(leaseID),
Value: []byte(leaseID),
}
if err := m.tokenView.Put(&ent); err != nil {
return fmt.Errorf("failed to persist lease index entry: %v", err)
}
return nil
}
// indexByToken looks up the secondary index from the token to a lease entry
func (m *ExpirationManager) indexByToken(token, leaseID string) (*logical.StorageEntry, error) {
key := m.tokenStore.SaltID(token) + "/" + m.tokenStore.SaltID(leaseID)
entry, err := m.tokenView.Get(key)
if err != nil {
return nil, fmt.Errorf("failed to look up secondary index entry")
}
return entry, nil
}
2015-04-10 21:48:08 +00:00
// removeIndexByToken removes the secondary index from the token to a lease entry
func (m *ExpirationManager) removeIndexByToken(token, leaseID string) error {
key := m.tokenStore.SaltID(token) + "/" + m.tokenStore.SaltID(leaseID)
if err := m.tokenView.Delete(key); err != nil {
return fmt.Errorf("failed to delete lease index entry: %v", err)
}
return nil
}
// lookupByToken is used to lookup all the leaseID's via the
func (m *ExpirationManager) lookupByToken(token string) ([]string, error) {
// Scan via the index for sub-leases
prefix := m.tokenStore.SaltID(token) + "/"
subKeys, err := m.tokenView.List(prefix)
if err != nil {
return nil, fmt.Errorf("failed to list leases: %v", err)
}
// Read each index entry
leaseIDs := make([]string, 0, len(subKeys))
for _, sub := range subKeys {
out, err := m.tokenView.Get(prefix + sub)
if err != nil {
return nil, fmt.Errorf("failed to read lease index: %v", err)
}
if out == nil {
continue
}
leaseIDs = append(leaseIDs, string(out.Value))
}
return leaseIDs, nil
}
2015-04-08 23:43:17 +00:00
// emitMetrics is invoked periodically to emit statistics
func (m *ExpirationManager) emitMetrics() {
m.pendingLock.Lock()
num := len(m.pending)
m.pendingLock.Unlock()
metrics.SetGauge([]string{"expire", "num_leases"}, float32(num))
}
2015-03-13 17:55:54 +00:00
// leaseEntry is used to structure the values the expiration
// manager stores. This is used to handle renew and revocation.
type leaseEntry struct {
LeaseID string `json:"lease_id"`
ClientToken string `json:"client_token"`
Path string `json:"path"`
Data map[string]interface{} `json:"data"`
Secret *logical.Secret `json:"secret"`
Auth *logical.Auth `json:"auth"`
IssueTime time.Time `json:"issue_time"`
ExpireTime time.Time `json:"expire_time"`
LastRenewalTime time.Time `json:"last_renewal_time"`
2015-03-13 17:55:54 +00:00
}
// encode is used to JSON encode the lease entry
func (le *leaseEntry) encode() ([]byte, error) {
return json.Marshal(le)
2015-03-13 17:55:54 +00:00
}
func (le *leaseEntry) renewable() (bool, error) {
var err error
switch {
2015-04-09 21:23:37 +00:00
// If there is no entry, cannot review
case le == nil || le.ExpireTime.IsZero():
err = fmt.Errorf("lease not found or lease is not renewable")
2015-04-09 21:23:37 +00:00
// Determine if the lease is expired
case le.ExpireTime.Before(time.Now()):
err = fmt.Errorf("lease expired")
2015-04-09 21:23:37 +00:00
// Determine if the lease is renewable
case le.Secret != nil && !le.Secret.Renewable:
err = fmt.Errorf("lease is not renewable")
case le.Auth != nil && !le.Auth.Renewable:
err = fmt.Errorf("lease is not renewable")
2015-04-09 21:23:37 +00:00
}
if err != nil {
return false, err
2015-04-09 21:23:37 +00:00
}
return true, nil
}
func (le *leaseEntry) ttl() int64 {
return int64(le.ExpireTime.Sub(time.Now().Round(time.Second)).Seconds())
2015-04-09 21:23:37 +00:00
}
2015-03-13 17:55:54 +00:00
// decodeLeaseEntry is used to reverse encode and return a new entry
func decodeLeaseEntry(buf []byte) (*leaseEntry, error) {
out := new(leaseEntry)
return out, jsonutil.DecodeJSON(buf, out)
2015-03-13 01:38:15 +00:00
}