package vault import ( "context" "crypto/ecdsa" "crypto/x509" "errors" "fmt" "sync/atomic" "time" metrics "github.com/armon/go-metrics" "github.com/hashicorp/errwrap" multierror "github.com/hashicorp/go-multierror" uuid "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/audit" "github.com/hashicorp/vault/helper/consts" "github.com/hashicorp/vault/helper/jsonutil" "github.com/hashicorp/vault/logical" "github.com/hashicorp/vault/physical" "github.com/oklog/run" ) // Standby checks if the Vault is in standby mode func (c *Core) Standby() (bool, error) { c.stateLock.RLock() standby := c.standby c.stateLock.RUnlock() return standby, nil } // Leader is used to get the current active leader func (c *Core) Leader() (isLeader bool, leaderAddr, clusterAddr string, err error) { // Check if HA enabled. We don't need the lock for this check as it's set // on startup and never modified if c.ha == nil { return false, "", "", ErrHANotEnabled } // Check if sealed if c.Sealed() { return false, "", "", consts.ErrSealed } c.stateLock.RLock() // Check if we are the leader if !c.standby { c.stateLock.RUnlock() return true, c.redirectAddr, c.clusterAddr, nil } // Initialize a lock lock, err := c.ha.LockWith(coreLockPath, "read") if err != nil { c.stateLock.RUnlock() return false, "", "", err } // Read the value held, leaderUUID, err := lock.Value() if err != nil { c.stateLock.RUnlock() return false, "", "", err } if !held { c.stateLock.RUnlock() return false, "", "", nil } c.clusterLeaderParamsLock.RLock() localLeaderUUID := c.clusterLeaderUUID localRedirAddr := c.clusterLeaderRedirectAddr localClusterAddr := c.clusterLeaderClusterAddr c.clusterLeaderParamsLock.RUnlock() // If the leader hasn't changed, return the cached value; nothing changes // mid-leadership, and the barrier caches anyways if leaderUUID == localLeaderUUID && localRedirAddr != "" { c.stateLock.RUnlock() return false, localRedirAddr, localClusterAddr, nil } c.logger.Trace("found new active node information, refreshing") defer c.stateLock.RUnlock() c.clusterLeaderParamsLock.Lock() defer c.clusterLeaderParamsLock.Unlock() // Validate base conditions again if leaderUUID == c.clusterLeaderUUID && c.clusterLeaderRedirectAddr != "" { return false, localRedirAddr, localClusterAddr, nil } key := coreLeaderPrefix + leaderUUID // Use background because postUnseal isn't run on standby entry, err := c.barrier.Get(context.Background(), key) if err != nil { return false, "", "", err } if entry == nil { return false, "", "", nil } var oldAdv bool var adv activeAdvertisement err = jsonutil.DecodeJSON(entry.Value, &adv) if err != nil { // Fall back to pre-struct handling adv.RedirectAddr = string(entry.Value) c.logger.Debug("parsed redirect addr for new active node", "redirect_addr", adv.RedirectAddr) oldAdv = true } if !oldAdv { c.logger.Debug("parsing information for new active node", "active_cluster_addr", adv.ClusterAddr, "active_redirect_addr", adv.RedirectAddr) // Ensure we are using current values err = c.loadLocalClusterTLS(adv) if err != nil { return false, "", "", err } // This will ensure that we both have a connection at the ready and that // the address is the current known value // Since this is standby, we don't use the active context. Later we may // use a process-scoped context err = c.refreshRequestForwardingConnection(context.Background(), adv.ClusterAddr) if err != nil { return false, "", "", err } } // Don't set these until everything has been parsed successfully or we'll // never try again c.clusterLeaderRedirectAddr = adv.RedirectAddr c.clusterLeaderClusterAddr = adv.ClusterAddr c.clusterLeaderUUID = leaderUUID return false, adv.RedirectAddr, adv.ClusterAddr, nil } // StepDown is used to step down from leadership func (c *Core) StepDown(httpCtx context.Context, req *logical.Request) (retErr error) { defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) if req == nil { retErr = multierror.Append(retErr, errors.New("nil request to step-down")) return retErr } c.stateLock.RLock() defer c.stateLock.RUnlock() if c.Sealed() { return nil } if c.ha == nil || c.standby { return nil } ctx, cancel := context.WithCancel(c.activeContext) defer cancel() go func() { select { case <-ctx.Done(): case <-httpCtx.Done(): cancel() } }() acl, te, entity, identityPolicies, err := c.fetchACLTokenEntryAndEntity(req) if err != nil { retErr = multierror.Append(retErr, err) return retErr } // Audit-log the request before going any further auth := &logical.Auth{ ClientToken: req.ClientToken, Policies: identityPolicies, IdentityPolicies: identityPolicies, } if te != nil { auth.TokenPolicies = te.Policies auth.Policies = append(te.Policies, identityPolicies...) auth.Metadata = te.Meta auth.DisplayName = te.DisplayName auth.EntityID = te.EntityID } logInput := &audit.LogInput{ Auth: auth, Request: req, } if err := c.auditBroker.LogRequest(ctx, logInput, c.auditedHeaders); err != nil { c.logger.Error("failed to audit request", "request_path", req.Path, "error", err) retErr = multierror.Append(retErr, errors.New("failed to audit request, cannot continue")) return retErr } if entity != nil && entity.Disabled { c.logger.Warn("permission denied as the entity on the token is disabled") retErr = multierror.Append(retErr, logical.ErrPermissionDenied) c.stateLock.RUnlock() return retErr } if te != nil && te.EntityID != "" && entity == nil { c.logger.Warn("permission denied as the entity on the token is invalid") retErr = multierror.Append(retErr, logical.ErrPermissionDenied) c.stateLock.RUnlock() return retErr } // Attempt to use the token (decrement num_uses) if te != nil { te, err = c.tokenStore.UseToken(ctx, te) if err != nil { c.logger.Error("failed to use token", "error", err) retErr = multierror.Append(retErr, ErrInternalError) return retErr } if te == nil { // Token has been revoked retErr = multierror.Append(retErr, logical.ErrPermissionDenied) return retErr } } // Verify that this operation is allowed authResults := c.performPolicyChecks(ctx, acl, te, req, entity, &PolicyCheckOpts{ RootPrivsRequired: true, }) if authResults.Error.ErrorOrNil() != nil { retErr = multierror.Append(retErr, authResults.Error) return retErr } if !authResults.Allowed { retErr = multierror.Append(retErr, logical.ErrPermissionDenied) return retErr } if te != nil && te.NumUses == tokenRevocationPending { // Token needs to be revoked. We do this immediately here because // we won't have a token store after sealing. leaseID, err := c.expiration.CreateOrFetchRevocationLeaseByToken(c.activeContext, te) if err == nil { err = c.expiration.Revoke(c.activeContext, leaseID) } if err != nil { c.logger.Error("token needed revocation before step-down but failed to revoke", "error", err) retErr = multierror.Append(retErr, ErrInternalError) } } select { case c.manualStepDownCh <- struct{}{}: default: c.logger.Warn("manual step-down operation already queued") } return retErr } // runStandby is a long running process that manages a number of the HA // subsystems. func (c *Core) runStandby(doneCh, manualStepDownCh, stopCh chan struct{}) { defer close(doneCh) defer close(manualStepDownCh) c.logger.Info("entering standby mode") var g run.Group { // This will cause all the other actors to close when the stop channel // is closed. g.Add(func() error { <-stopCh return nil }, func(error) {}) } { // Monitor for key rotation keyRotateDone := make(chan struct{}) keyRotateStop := make(chan struct{}) g.Add(func() error { c.periodicCheckKeyUpgrade(context.Background(), keyRotateDone, keyRotateStop) return nil }, func(error) { close(keyRotateStop) c.logger.Debug("shutting down periodic key rotation checker") <-keyRotateDone }) } { // Monitor for new leadership checkLeaderDone := make(chan struct{}) checkLeaderStop := make(chan struct{}) g.Add(func() error { c.periodicLeaderRefresh(checkLeaderDone, checkLeaderStop) return nil }, func(error) { close(checkLeaderStop) c.logger.Debug("shutting down periodic leader refresh") <-checkLeaderDone }) } { // Wait for leadership leaderDoneCh := make(chan struct{}) leaderStopCh := make(chan struct{}) g.Add(func() error { c.waitForLeadership(leaderDoneCh, manualStepDownCh, leaderStopCh) return nil }, func(error) { close(leaderStopCh) c.logger.Debug("shutting down leader elections") <-leaderDoneCh }) } // Start all the actors g.Run() } // waitForLeadership is a long running routine that is used when an HA backend // is enabled. It waits until we are leader and switches this Vault to // active. func (c *Core) waitForLeadership(doneCh, manualStepDownCh, stopCh chan struct{}) { defer close(doneCh) c.logger.Info("entering standby mode") var manualStepDown bool for { // Check for a shutdown select { case <-stopCh: c.logger.Debug("stop channel triggered in runStandby") return default: // If we've just down, we could instantly grab the lock again. Give // the other nodes a chance. if manualStepDown { time.Sleep(manualStepDownSleepPeriod) manualStepDown = false } } // Create a lock uuid, err := uuid.GenerateUUID() if err != nil { c.logger.Error("failed to generate uuid", "error", err) return } lock, err := c.ha.LockWith(coreLockPath, uuid) if err != nil { c.logger.Error("failed to create lock", "error", err) return } // Attempt the acquisition leaderLostCh := c.acquireLock(lock, stopCh) // Bail if we are being shutdown if leaderLostCh == nil { return } c.logger.Info("acquired lock, enabling active operation") // This is used later to log a metrics event; this can be helpful to // detect flapping activeTime := time.Now() // Grab the lock as we need it for cluster setup, which needs to happen // before advertising; lockGrabbedCh := make(chan struct{}) go func() { // Grab the lock c.stateLock.Lock() // If stopCh has been closed, which only happens while the // stateLock is held, we have actually terminated, so we just // instantly give up the lock, otherwise we notify that it's ready // for consumption select { case <-stopCh: c.stateLock.Unlock() default: close(lockGrabbedCh) } }() select { case <-stopCh: lock.Unlock() metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) return case <-lockGrabbedCh: // We now have the lock and can use it } if c.Sealed() { c.logger.Warn("grabbed HA lock but already sealed, exiting") lock.Unlock() c.stateLock.Unlock() metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) return } // Store the lock so that we can manually clear it later if needed c.heldHALock = lock // Create the active context activeCtx, activeCtxCancel := context.WithCancel(context.Background()) c.activeContext = activeCtx c.activeContextCancelFunc.Store(activeCtxCancel) // This block is used to wipe barrier/seal state and verify that // everything is sane. If we have no sanity in the barrier, we actually // seal, as there's little we can do. { c.seal.SetBarrierConfig(activeCtx, nil) if c.seal.RecoveryKeySupported() { c.seal.SetRecoveryConfig(activeCtx, nil) } if err := c.performKeyUpgrades(activeCtx); err != nil { // We call this in a goroutine so that we can give up the // statelock and have this shut us down; sealInternal has a // workflow where it watches for the stopCh to close so we want // to return from here c.logger.Error("error performing key upgrades", "error", err) go c.Shutdown() c.heldHALock = nil lock.Unlock() c.stateLock.Unlock() metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) return } } { // Clear previous local cluster cert info so we generate new. Since the // UUID will have changed, standbys will know to look for new info c.localClusterParsedCert.Store((*x509.Certificate)(nil)) c.localClusterCert.Store(([]byte)(nil)) c.localClusterPrivateKey.Store((*ecdsa.PrivateKey)(nil)) if err := c.setupCluster(activeCtx); err != nil { c.heldHALock = nil lock.Unlock() c.stateLock.Unlock() c.logger.Error("cluster setup failed", "error", err) metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) continue } } // Advertise as leader if err := c.advertiseLeader(activeCtx, uuid, leaderLostCh); err != nil { c.heldHALock = nil lock.Unlock() c.stateLock.Unlock() c.logger.Error("leader advertisement setup failed", "error", err) metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) continue } // Attempt the post-unseal process err = c.postUnseal(activeCtx, activeCtxCancel) if err == nil { c.standby = false } c.stateLock.Unlock() // Handle a failure to unseal if err != nil { c.logger.Error("post-unseal setup failed", "error", err) lock.Unlock() metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime) continue } cancelCtxAndLock := func() { go func() { select { case <-activeCtx.Done(): // Attempt to drain any inflight requests case <-time.After(DefaultMaxRequestDuration): activeCtxCancel() } }() c.stateLock.Lock() activeCtxCancel() } runSealing := func() { metrics.MeasureSince([]string{"core", "leadership_lost"}, activeTime) c.standby = true if err := c.preSeal(); err != nil { c.logger.Error("pre-seal teardown failed", "error", err) } } releaseHALock := func() { // We may hit this from leaderLostCh or manualStepDownCh if they // triggered before stopCh, so we check here instead of only in the // stopCh case so we can try to do the right thing then, too if atomic.LoadUint32(c.keepHALockOnStepDown) == 1 { return } if err := c.clearLeader(uuid); err != nil { c.logger.Error("clearing leader advertisement failed", "error", err) } c.heldHALock.Unlock() c.heldHALock = nil } // Monitor a loss of leadership select { case <-leaderLostCh: c.logger.Warn("leadership lost, stopping active operation") cancelCtxAndLock() runSealing() releaseHALock() c.stateLock.Unlock() case <-stopCh: activeCtxCancel() runSealing() releaseHALock() return case <-manualStepDownCh: manualStepDown = true c.logger.Warn("stepping down from active operation to standby") cancelCtxAndLock() runSealing() releaseHALock() c.stateLock.Unlock() } } } // This checks the leader periodically to ensure that we switch RPC to a new // leader pretty quickly. There is logic in Leader() already to not make this // onerous and avoid more traffic than needed, so we just call that and ignore // the result. func (c *Core) periodicLeaderRefresh(doneCh, stopCh chan struct{}) { defer close(doneCh) opCount := new(int32) for { select { case <-time.After(leaderCheckInterval): count := atomic.AddInt32(opCount, 1) if count > 1 { atomic.AddInt32(opCount, -1) continue } // We do this in a goroutine because otherwise if this refresh is // called while we're shutting down the call to Leader() can // deadlock, which then means stopCh can never been seen and we can // block shutdown go func() { // Bind locally, as the race detector is tripping here lopCount := opCount c.Leader() atomic.AddInt32(lopCount, -1) }() case <-stopCh: return } } } // periodicCheckKeyUpgrade is used to watch for key rotation events as a standby func (c *Core) periodicCheckKeyUpgrade(ctx context.Context, doneCh, stopCh chan struct{}) { defer close(doneCh) opCount := new(int32) for { select { case <-time.After(keyRotateCheckInterval): count := atomic.AddInt32(opCount, 1) if count > 1 { atomic.AddInt32(opCount, -1) continue } go func() { // Bind locally, as the race detector is tripping here lopCount := opCount // Only check if we are a standby c.stateLock.RLock() standby := c.standby c.stateLock.RUnlock() if !standby { atomic.AddInt32(lopCount, -1) return } // Check for a poison pill. If we can read it, it means we have stale // keys (e.g. from replication being activated) and we need to seal to // be unsealed again. entry, _ := c.barrier.Get(ctx, poisonPillPath) if entry != nil && len(entry.Value) > 0 { c.logger.Warn("encryption keys have changed out from underneath us (possibly due to replication enabling), must be unsealed again") go c.Shutdown() atomic.AddInt32(lopCount, -1) return } if err := c.checkKeyUpgrades(ctx); err != nil { c.logger.Error("key rotation periodic upgrade check failed", "error", err) } atomic.AddInt32(lopCount, -1) return }() case <-stopCh: return } } } // checkKeyUpgrades is used to check if there have been any key rotations // and if there is a chain of upgrades available func (c *Core) checkKeyUpgrades(ctx context.Context) error { for { // Check for an upgrade didUpgrade, newTerm, err := c.barrier.CheckUpgrade(ctx) if err != nil { return err } // Nothing to do if no upgrade if !didUpgrade { break } if c.logger.IsInfo() { c.logger.Info("upgraded to new key term", "term", newTerm) } } return nil } func (c *Core) performKeyUpgrades(ctx context.Context) error { if err := c.checkKeyUpgrades(ctx); err != nil { return errwrap.Wrapf("error checking for key upgrades: {{err}}", err) } if err := c.barrier.ReloadMasterKey(ctx); err != nil { return errwrap.Wrapf("error reloading master key: {{err}}", err) } if err := c.barrier.ReloadKeyring(ctx); err != nil { return errwrap.Wrapf("error reloading keyring: {{err}}", err) } if err := c.scheduleUpgradeCleanup(ctx); err != nil { return errwrap.Wrapf("error scheduling upgrade cleanup: {{err}}", err) } return nil } // scheduleUpgradeCleanup is used to ensure that all the upgrade paths // are cleaned up in a timely manner if a leader failover takes place func (c *Core) scheduleUpgradeCleanup(ctx context.Context) error { // List the upgrades upgrades, err := c.barrier.List(ctx, keyringUpgradePrefix) if err != nil { return errwrap.Wrapf("failed to list upgrades: {{err}}", err) } // Nothing to do if no upgrades if len(upgrades) == 0 { return nil } // Schedule cleanup for all of them time.AfterFunc(keyRotateGracePeriod, func() { sealed, err := c.barrier.Sealed() if err != nil { c.logger.Warn("failed to check barrier status at upgrade cleanup time") return } if sealed { c.logger.Warn("barrier sealed at upgrade cleanup time") return } for _, upgrade := range upgrades { path := fmt.Sprintf("%s%s", keyringUpgradePrefix, upgrade) if err := c.barrier.Delete(ctx, path); err != nil { c.logger.Error("failed to cleanup upgrade", "path", path, "error", err) } } }) return nil } // acquireLock blocks until the lock is acquired, returning the leaderLostCh func (c *Core) acquireLock(lock physical.Lock, stopCh <-chan struct{}) <-chan struct{} { for { // Attempt lock acquisition leaderLostCh, err := lock.Lock(stopCh) if err == nil { return leaderLostCh } // Retry the acquisition c.logger.Error("failed to acquire lock", "error", err) select { case <-time.After(lockRetryInterval): case <-stopCh: return nil } } } // advertiseLeader is used to advertise the current node as leader func (c *Core) advertiseLeader(ctx context.Context, uuid string, leaderLostCh <-chan struct{}) error { go c.cleanLeaderPrefix(ctx, uuid, leaderLostCh) var key *ecdsa.PrivateKey switch c.localClusterPrivateKey.Load().(type) { case *ecdsa.PrivateKey: key = c.localClusterPrivateKey.Load().(*ecdsa.PrivateKey) default: c.logger.Error("unknown cluster private key type", "key_type", fmt.Sprintf("%T", c.localClusterPrivateKey.Load())) return fmt.Errorf("unknown cluster private key type %T", c.localClusterPrivateKey.Load()) } keyParams := &clusterKeyParams{ Type: corePrivateKeyTypeP521, X: key.X, Y: key.Y, D: key.D, } locCert := c.localClusterCert.Load().([]byte) localCert := make([]byte, len(locCert)) copy(localCert, locCert) adv := &activeAdvertisement{ RedirectAddr: c.redirectAddr, ClusterAddr: c.clusterAddr, ClusterCert: localCert, ClusterKeyParams: keyParams, } val, err := jsonutil.EncodeJSON(adv) if err != nil { return err } ent := &Entry{ Key: coreLeaderPrefix + uuid, Value: val, } err = c.barrier.Put(ctx, ent) if err != nil { return err } sd, ok := c.ha.(physical.ServiceDiscovery) if ok { if err := sd.NotifyActiveStateChange(); err != nil { if c.logger.IsWarn() { c.logger.Warn("failed to notify active status", "error", err) } } } return nil } func (c *Core) cleanLeaderPrefix(ctx context.Context, uuid string, leaderLostCh <-chan struct{}) { keys, err := c.barrier.List(ctx, coreLeaderPrefix) if err != nil { c.logger.Error("failed to list entries in core/leader", "error", err) return } for len(keys) > 0 { select { case <-time.After(leaderPrefixCleanDelay): if keys[0] != uuid { c.barrier.Delete(ctx, coreLeaderPrefix+keys[0]) } keys = keys[1:] case <-leaderLostCh: return } } } // clearLeader is used to clear our leadership entry func (c *Core) clearLeader(uuid string) error { key := coreLeaderPrefix + uuid err := c.barrier.Delete(context.Background(), key) // Advertise ourselves as a standby sd, ok := c.ha.(physical.ServiceDiscovery) if ok { if err := sd.NotifyActiveStateChange(); err != nil { if c.logger.IsWarn() { c.logger.Warn("failed to notify standby status", "error", err) } } } return err }