package vault import ( "bytes" "errors" "fmt" "log" "net/url" "os" "sync" "time" "github.com/armon/go-metrics" "github.com/hashicorp/errwrap" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/audit" "github.com/hashicorp/vault/helper/mlock" "github.com/hashicorp/vault/logical" "github.com/hashicorp/vault/physical" "github.com/hashicorp/vault/shamir" ) const ( // coreLockPath is the path used to acquire a coordinating lock // for a highly-available deploy. coreLockPath = "core/lock" // coreLeaderPrefix is the prefix used for the UUID that contains // the currently elected leader. coreLeaderPrefix = "core/leader/" // lockRetryInterval is the interval we re-attempt to acquire the // HA lock if an error is encountered lockRetryInterval = 10 * time.Second // keyRotateCheckInterval is how often a standby checks for a key // rotation taking place. keyRotateCheckInterval = 30 * time.Second // keyRotateGracePeriod is how long we allow an upgrade path // for standby instances before we delete the upgrade keys keyRotateGracePeriod = 2 * time.Minute // leaderPrefixCleanDelay is how long to wait between deletions // of orphaned leader keys, to prevent slamming the backend. leaderPrefixCleanDelay = 200 * time.Millisecond // manualStepDownSleepPeriod is how long to sleep after a user-initiated // step down of the active node, to prevent instantly regrabbing the lock manualStepDownSleepPeriod = 10 * time.Second ) var ( // ErrSealed is returned if an operation is performed on // a sealed barrier. No operation is expected to succeed before unsealing ErrSealed = errors.New("Vault is sealed") // ErrStandby is returned if an operation is performed on // a standby Vault. No operation is expected to succeed until active. ErrStandby = errors.New("Vault is in standby mode") // ErrAlreadyInit is returned if the core is already // initialized. This prevents a re-initialization. ErrAlreadyInit = errors.New("Vault is already initialized") // ErrNotInit is returned if a non-initialized barrier // is attempted to be unsealed. ErrNotInit = errors.New("Vault is not initialized") // ErrInternalError is returned when we don't want to leak // any information about an internal error ErrInternalError = errors.New("internal error") // ErrHANotEnabled is returned if the operation only makes sense // in an HA setting ErrHANotEnabled = errors.New("Vault is not configured for highly-available mode") ) // NonFatalError is an error that can be returned during NewCore that should be // displayed but not cause a program exit type NonFatalError struct { Err error } func (e *NonFatalError) WrappedErrors() []error { return []error{e.Err} } func (e *NonFatalError) Error() string { return e.Err.Error() } // ErrInvalidKey is returned if there is an error with a // provided unseal key. type ErrInvalidKey struct { Reason string } func (e *ErrInvalidKey) Error() string { return fmt.Sprintf("invalid key: %v", e.Reason) } // Core is used as the central manager of Vault activity. It is the primary point of // interface for API handlers and is responsible for managing the logical and physical // backends, router, security barrier, and audit trails. type Core struct { // HABackend may be available depending on the physical backend ha physical.HABackend // AdvertiseAddr is the address we advertise as leader if held advertiseAddr string // physical backend is the un-trusted backend with durable data physical physical.Backend // Our Seal, for seal configuration information seal Seal // barrier is the security barrier wrapping the physical backend barrier SecurityBarrier // router is responsible for managing the mount points for logical backends. router *Router // logicalBackends is the mapping of backends to use for this core logicalBackends map[string]logical.Factory // credentialBackends is the mapping of backends to use for this core credentialBackends map[string]logical.Factory // auditBackends is the mapping of backends to use for this core auditBackends map[string]audit.Factory // stateLock protects mutable state stateLock sync.RWMutex sealed bool standby bool standbyDoneCh chan struct{} standbyStopCh chan struct{} manualStepDownCh chan struct{} // unlockParts has the keys provided to Unseal until // the threshold number of parts is available. unlockParts [][]byte // generateRootProgress holds the shares until we reach enough // to verify the master key generateRootConfig *GenerateRootConfig generateRootProgress [][]byte generateRootLock sync.Mutex // These variables holds the config and shares we have until we reach // enough to verify the appropriate master key. Note that the same lock is // used; this isn't time-critical so this shouldn't be a problem. barrierRekeyConfig *SealConfig barrierRekeyProgress [][]byte recoveryRekeyConfig *SealConfig recoveryRekeyProgress [][]byte rekeyLock sync.RWMutex // mounts is loaded after unseal since it is a protected // configuration mounts *MountTable // mountsLock is used to ensure that the mounts table does not // change underneath a calling function mountsLock sync.RWMutex // auth is loaded after unseal since it is a protected // configuration auth *MountTable // authLock is used to ensure that the auth table does not // change underneath a calling function authLock sync.RWMutex // audit is loaded after unseal since it is a protected // configuration audit *MountTable // auditLock is used to ensure that the audit table does not // change underneath a calling function auditLock sync.RWMutex // auditBroker is used to ingest the audit events and fan // out into the configured audit backends auditBroker *AuditBroker // systemBarrierView is the barrier view for the system backend systemBarrierView *BarrierView // expiration manager is used for managing LeaseIDs, // renewal, expiration and revocation expiration *ExpirationManager // rollback manager is used to run rollbacks periodically rollback *RollbackManager // policy store is used to manage named ACL policies policyStore *PolicyStore // token store is used to manage authentication tokens tokenStore *TokenStore // metricsCh is used to stop the metrics streaming metricsCh chan struct{} // metricsMutex is used to prevent a race condition between // metrics emission and sealing leading to a nil pointer metricsMutex sync.Mutex defaultLeaseTTL time.Duration maxLeaseTTL time.Duration logger *log.Logger } // CoreConfig is used to parameterize a core type CoreConfig struct { LogicalBackends map[string]logical.Factory CredentialBackends map[string]logical.Factory AuditBackends map[string]audit.Factory Physical physical.Backend HAPhysical physical.HABackend // May be nil, which disables HA operations Seal Seal Logger *log.Logger DisableCache bool // Disables the LRU cache on the physical backend DisableMlock bool // Disables mlock syscall CacheSize int // Custom cache size of zero for default AdvertiseAddr string // Set as the leader address for HA DefaultLeaseTTL time.Duration MaxLeaseTTL time.Duration } // NewCore is used to construct a new core func NewCore(conf *CoreConfig) (*Core, error) { if conf.HAPhysical != nil && conf.AdvertiseAddr == "" { return nil, fmt.Errorf("missing advertisement address") } if conf.DefaultLeaseTTL == 0 { conf.DefaultLeaseTTL = defaultLeaseTTL } if conf.MaxLeaseTTL == 0 { conf.MaxLeaseTTL = maxLeaseTTL } if conf.DefaultLeaseTTL > conf.MaxLeaseTTL { return nil, fmt.Errorf("cannot have DefaultLeaseTTL larger than MaxLeaseTTL") } // Validate the advertise addr if its given to us if conf.AdvertiseAddr != "" { u, err := url.Parse(conf.AdvertiseAddr) if err != nil { return nil, fmt.Errorf("advertisement address is not valid url: %s", err) } if u.Scheme == "" { return nil, fmt.Errorf("advertisement address must include scheme (ex. 'http')") } } // Wrap the backend in a cache unless disabled if !conf.DisableCache { _, isCache := conf.Physical.(*physical.Cache) _, isInmem := conf.Physical.(*physical.InmemBackend) if !isCache && !isInmem { cache := physical.NewCache(conf.Physical, conf.CacheSize) conf.Physical = cache } } if !conf.DisableMlock { // Ensure our memory usage is locked into physical RAM if err := mlock.LockMemory(); err != nil { return nil, fmt.Errorf( "Failed to lock memory: %v\n\n"+ "This usually means that the mlock syscall is not available.\n"+ "Vault uses mlock to prevent memory from being swapped to\n"+ "disk. This requires root privileges as well as a machine\n"+ "that supports mlock. Please enable mlock on your system or\n"+ "disable Vault from using it. To disable Vault from using it,\n"+ "set the `disable_mlock` configuration option in your configuration\n"+ "file.", err) } } // Construct a new AES-GCM barrier barrier, err := NewAESGCMBarrier(conf.Physical) if err != nil { return nil, fmt.Errorf("barrier setup failed: %v", err) } // Make a default logger if not provided if conf.Logger == nil { conf.Logger = log.New(os.Stderr, "", log.LstdFlags) } // Setup the core c := &Core{ ha: conf.HAPhysical, advertiseAddr: conf.AdvertiseAddr, physical: conf.Physical, seal: conf.Seal, barrier: barrier, router: NewRouter(), sealed: true, standby: true, logger: conf.Logger, defaultLeaseTTL: conf.DefaultLeaseTTL, maxLeaseTTL: conf.MaxLeaseTTL, } // Setup the backends logicalBackends := make(map[string]logical.Factory) for k, f := range conf.LogicalBackends { logicalBackends[k] = f } _, ok := logicalBackends["generic"] if !ok { logicalBackends["generic"] = PassthroughBackendFactory } logicalBackends["cubbyhole"] = CubbyholeBackendFactory logicalBackends["system"] = func(config *logical.BackendConfig) (logical.Backend, error) { return NewSystemBackend(c, config), nil } c.logicalBackends = logicalBackends credentialBackends := make(map[string]logical.Factory) for k, f := range conf.CredentialBackends { credentialBackends[k] = f } credentialBackends["token"] = func(config *logical.BackendConfig) (logical.Backend, error) { return NewTokenStore(c, config) } c.credentialBackends = credentialBackends auditBackends := make(map[string]audit.Factory) for k, f := range conf.AuditBackends { auditBackends[k] = f } c.auditBackends = auditBackends if c.seal == nil { c.seal = &DefaultSeal{} } c.seal.SetCore(c) // Attempt unsealing with stored keys; if there are no stored keys this // returns nil, otherwise returns nil or an error storedKeyErr := c.UnsealWithStoredKeys() return c, storedKeyErr } // Shutdown is invoked when the Vault instance is about to be terminated. It // should not be accessible as part of an API call as it will cause an availability // problem. It is only used to gracefully quit in the case of HA so that failover // happens as quickly as possible. func (c *Core) Shutdown() error { c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } // Seal the Vault, causes a leader stepdown return c.sealInternal() } func (c *Core) fetchACLandTokenEntry(req *logical.Request) (*ACL, *TokenEntry, error) { defer metrics.MeasureSince([]string{"core", "fetch_acl_and_token"}, time.Now()) // Ensure there is a client token if req.ClientToken == "" { return nil, nil, fmt.Errorf("missing client token") } if c.tokenStore == nil { c.logger.Printf("[ERR] core: token store is unavailable") return nil, nil, ErrInternalError } // Resolve the token policy te, err := c.tokenStore.Lookup(req.ClientToken) if err != nil { c.logger.Printf("[ERR] core: failed to lookup token: %v", err) return nil, nil, ErrInternalError } // Ensure the token is valid if te == nil { return nil, nil, logical.ErrPermissionDenied } // Construct the corresponding ACL object acl, err := c.policyStore.ACL(te.Policies...) if err != nil { c.logger.Printf("[ERR] core: failed to construct ACL: %v", err) return nil, nil, ErrInternalError } return acl, te, nil } func (c *Core) checkToken(req *logical.Request) (*logical.Auth, *TokenEntry, error) { defer metrics.MeasureSince([]string{"core", "check_token"}, time.Now()) acl, te, err := c.fetchACLandTokenEntry(req) if err != nil { return nil, te, err } // Check if this is a root protected path rootPath := c.router.RootPath(req.Path) // When we receive a write of either type, rather than require clients to // PUT/POST and trust the operation, we ask the backend to give us the real // skinny -- if the backend implements an existence check, it can tell us // whether a particular resource exists. Then we can mark it as an update // or creation as appropriate. if req.Operation == logical.CreateOperation || req.Operation == logical.UpdateOperation { checkExists, resourceExists, err := c.router.RouteExistenceCheck(req) switch err { case logical.ErrUnsupportedPath: // fail later via bad path to avoid confusing items in the log checkExists = false case nil: // Continue on default: c.logger.Printf("[ERR] core: failed to run existence check: %v", err) return nil, nil, ErrInternalError } switch { case checkExists == false: // No existence check, so always treate it as an update operation, which is how it is pre 0.5 req.Operation = logical.UpdateOperation case resourceExists == true: // It exists, so force an update operation req.Operation = logical.UpdateOperation case resourceExists == false: // It doesn't exist, force a create operation req.Operation = logical.CreateOperation default: panic("unreachable code") } } // Check the standard non-root ACLs. Return the token entry if it's not // allowed so we can decrement the use count. allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) if !allowed { return nil, te, logical.ErrPermissionDenied } if rootPath && !rootPrivs { return nil, te, logical.ErrPermissionDenied } // Create the auth response auth := &logical.Auth{ ClientToken: req.ClientToken, Policies: te.Policies, Metadata: te.Meta, DisplayName: te.DisplayName, } return auth, te, nil } // Sealed checks if the Vault is current sealed func (c *Core) Sealed() (bool, error) { c.stateLock.RLock() defer c.stateLock.RUnlock() return c.sealed, nil } // Standby checks if the Vault is in standby mode func (c *Core) Standby() (bool, error) { c.stateLock.RLock() defer c.stateLock.RUnlock() return c.standby, nil } // Leader is used to get the current active leader func (c *Core) Leader() (bool, string, error) { c.stateLock.RLock() defer c.stateLock.RUnlock() // Check if HA enabled if c.ha == nil { return false, "", ErrHANotEnabled } // Check if sealed if c.sealed { return false, "", ErrSealed } // Check if we are the leader if !c.standby { return true, c.advertiseAddr, nil } // Initialize a lock lock, err := c.ha.LockWith(coreLockPath, "read") if err != nil { return false, "", err } // Read the value held, value, err := lock.Value() if err != nil { return false, "", err } if !held { return false, "", nil } // Value is the UUID of the leader, fetch the key key := coreLeaderPrefix + value entry, err := c.barrier.Get(key) if err != nil { return false, "", err } if entry == nil { return false, "", nil } // Leader address is in the entry return false, string(entry.Value), nil } // SecretProgress returns the number of keys provided so far func (c *Core) SecretProgress() int { c.stateLock.RLock() defer c.stateLock.RUnlock() return len(c.unlockParts) } // ResetUnsealProcess removes the current unlock parts from memory, to reset // the unsealing process func (c *Core) ResetUnsealProcess() { c.stateLock.Lock() defer c.stateLock.Unlock() if !c.sealed { return } c.unlockParts = nil } // Unseal is used to provide one of the key parts to unseal the Vault. // // They key given as a parameter will automatically be zerod after // this method is done with it. If you want to keep the key around, a copy // should be made. func (c *Core) Unseal(key []byte) (bool, error) { defer metrics.MeasureSince([]string{"core", "unseal"}, time.Now()) // Verify the key length min, max := c.barrier.KeyLength() max += shamir.ShareOverhead if len(key) < min { return false, &ErrInvalidKey{fmt.Sprintf("key is shorter than minimum %d bytes", min)} } if len(key) > max { return false, &ErrInvalidKey{fmt.Sprintf("key is longer than maximum %d bytes", max)} } // Get the seal configuration config, err := c.seal.BarrierConfig() if err != nil { return false, err } // Ensure the barrier is initialized if config == nil { return false, ErrNotInit } c.stateLock.Lock() defer c.stateLock.Unlock() // Check if already unsealed if !c.sealed { return true, nil } // Check if we already have this piece for _, existing := range c.unlockParts { if bytes.Equal(existing, key) { return false, nil } } // Store this key c.unlockParts = append(c.unlockParts, key) // Check if we don't have enough keys to unlock if len(c.unlockParts) < config.SecretThreshold { c.logger.Printf("[DEBUG] core: cannot unseal, have %d of %d keys", len(c.unlockParts), config.SecretThreshold) return false, nil } // Recover the master key var masterKey []byte if config.SecretThreshold == 1 { masterKey = c.unlockParts[0] c.unlockParts = nil } else { masterKey, err = shamir.Combine(c.unlockParts) c.unlockParts = nil if err != nil { return false, fmt.Errorf("failed to compute master key: %v", err) } } defer memzero(masterKey) // Attempt to unlock if err := c.barrier.Unseal(masterKey); err != nil { return false, err } c.logger.Printf("[INFO] core: vault is unsealed") // Do post-unseal setup if HA is not enabled if c.ha == nil { if err := c.postUnseal(); err != nil { c.logger.Printf("[ERR] core: post-unseal setup failed: %v", err) c.barrier.Seal() c.logger.Printf("[WARN] core: vault is sealed") return false, err } c.standby = false } else { // Go to standby mode, wait until we are active to unseal c.standbyDoneCh = make(chan struct{}) c.standbyStopCh = make(chan struct{}) c.manualStepDownCh = make(chan struct{}) go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh) } // Success! c.sealed = false if c.ha != nil { sd, ok := c.ha.(physical.ServiceDiscovery) if ok { go func() { if err := sd.AdvertiseSealed(false); err != nil { c.logger.Printf("[WARN] core: failed to advertise unsealed status: %v", err) } }() } } return true, nil } // Seal is used to re-seal the Vault. This requires the Vault to // be unsealed again to perform any further operations. func (c *Core) Seal(token string) (retErr error) { defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, Path: "sys/seal", ClientToken: token, } acl, te, err := c.fetchACLandTokenEntry(req) if err != nil { // Since there is no token store in standby nodes, sealing cannot // be done. Ideally, the request has to be forwarded to leader node // for validation and the operation should be performed. But for now, // just returning with an error and recommending a vault restart, which // essentially does the same thing. if c.standby { c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead") return errors.New("vault cannot seal when in standby mode; please restart instead") } return err } // Attempt to use the token (decrement num_uses) // On error bail out; if the token has been revoked, bail out too if te != nil { te, err = c.tokenStore.UseToken(te) if err != nil { c.logger.Printf("[ERR] core: failed to use token: %v", err) return ErrInternalError } if te == nil { // Token is no longer valid return logical.ErrPermissionDenied } if te.NumUses == -1 { // Token needs to be revoked return c.tokenStore.Revoke(te.ID) } } // Verify that this operation is allowed allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) if !allowed { return logical.ErrPermissionDenied } // We always require root privileges for this operation if !rootPrivs { return logical.ErrPermissionDenied } //Seal the Vault err = c.sealInternal() if err == nil && retErr == ErrInternalError { c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") } else { retErr = err } return } // StepDown is used to step down from leadership func (c *Core) StepDown(token string) error { defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } if c.ha == nil || c.standby { return nil } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, Path: "sys/step-down", ClientToken: token, } acl, te, err := c.fetchACLandTokenEntry(req) if err != nil { return err } // Attempt to use the token (decrement num_uses) if te != nil { te, err = c.tokenStore.UseToken(te) if err != nil { c.logger.Printf("[ERR] core: failed to use token: %v", err) return err } if te == nil { // Token has been revoked return logical.ErrPermissionDenied } if te.NumUses == -1 { // Token needs to be revoked return c.tokenStore.Revoke(te.ID) } } // Verify that this operation is allowed allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path) if !allowed { return logical.ErrPermissionDenied } // We always require root privileges for this operation if !rootPrivs { return logical.ErrPermissionDenied } select { case c.manualStepDownCh <- struct{}{}: default: c.logger.Printf("[WARN] core: manual step-down operation already queued") } return nil } // sealInternal is an internal method used to seal the vault. It does not do // any authorization checking. The stateLock must be held prior to calling. func (c *Core) sealInternal() error { // Enable that we are sealed to prevent furthur transactions c.sealed = true // Do pre-seal teardown if HA is not enabled if c.ha == nil { if err := c.preSeal(); err != nil { c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err) return fmt.Errorf("internal error") } } else { // Signal the standby goroutine to shutdown, wait for completion close(c.standbyStopCh) // Release the lock while we wait to avoid deadlocking c.stateLock.Unlock() <-c.standbyDoneCh c.stateLock.Lock() } if err := c.barrier.Seal(); err != nil { return err } c.logger.Printf("[INFO] core: vault is sealed") if c.ha != nil { sd, ok := c.ha.(physical.ServiceDiscovery) if ok { go func() { if err := sd.AdvertiseSealed(true); err != nil { c.logger.Printf("[WARN] core: failed to advertise sealed status: %v", err) } }() } } return nil } // postUnseal is invoked after the barrier is unsealed, but before // allowing any user operations. This allows us to setup any state that // requires the Vault to be unsealed such as mount tables, logical backends, // credential stores, etc. func (c *Core) postUnseal() (retErr error) { defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now()) defer func() { if retErr != nil { c.preSeal() } }() c.logger.Printf("[INFO] core: post-unseal setup starting") if cache, ok := c.physical.(*physical.Cache); ok { cache.Purge() } // HA mode requires us to handle keyring rotation and rekeying if c.ha != nil { if err := c.checkKeyUpgrades(); err != nil { return err } if err := c.barrier.ReloadMasterKey(); err != nil { return err } if err := c.barrier.ReloadKeyring(); err != nil { return err } if err := c.scheduleUpgradeCleanup(); err != nil { return err } } if err := c.loadMounts(); err != nil { return err } if err := c.setupMounts(); err != nil { return err } if err := c.startRollback(); err != nil { return err } if err := c.setupPolicyStore(); err != nil { return err } if err := c.loadCredentials(); err != nil { return err } if err := c.setupCredentials(); err != nil { return err } if err := c.setupExpiration(); err != nil { return err } if err := c.loadAudits(); err != nil { return err } if err := c.setupAudits(); err != nil { return err } c.metricsCh = make(chan struct{}) go c.emitMetrics(c.metricsCh) c.logger.Printf("[INFO] core: post-unseal setup complete") return nil } // preSeal is invoked before the barrier is sealed, allowing // for any state teardown required. func (c *Core) preSeal() error { defer metrics.MeasureSince([]string{"core", "pre_seal"}, time.Now()) c.logger.Printf("[INFO] core: pre-seal teardown starting") // Clear any rekey progress c.barrierRekeyConfig = nil c.barrierRekeyProgress = nil c.recoveryRekeyConfig = nil c.recoveryRekeyProgress = nil if c.metricsCh != nil { close(c.metricsCh) c.metricsCh = nil } var result error if err := c.teardownAudits(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error tearing down audits: {{err}}", err)) } if err := c.stopExpiration(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error stopping expiration: {{err}}", err)) } if err := c.teardownCredentials(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error tearing down credentials: {{err}}", err)) } if err := c.teardownPolicyStore(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error tearing down policy store: {{err}}", err)) } if err := c.stopRollback(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error stopping rollback: {{err}}", err)) } if err := c.unloadMounts(); err != nil { result = multierror.Append(result, errwrap.Wrapf("[ERR] error unloading mounts: {{err}}", err)) } if cache, ok := c.physical.(*physical.Cache); ok { cache.Purge() } c.logger.Printf("[INFO] core: pre-seal teardown complete") return result } // runStandby is a long running routine that is used when an HA backend // is enabled. It waits until we are leader and switches this Vault to // active. func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) { defer close(doneCh) defer close(manualStepDownCh) c.logger.Printf("[INFO] core: entering standby mode") // Monitor for key rotation keyRotateDone := make(chan struct{}) keyRotateStop := make(chan struct{}) go c.periodicCheckKeyUpgrade(keyRotateDone, keyRotateStop) defer func() { close(keyRotateStop) <-keyRotateDone }() for { // Check for a shutdown select { case <-stopCh: return default: } // Create a lock uuid, err := uuid.GenerateUUID() if err != nil { c.logger.Printf("[ERR] core: failed to generate uuid: %v", err) return } lock, err := c.ha.LockWith(coreLockPath, uuid) if err != nil { c.logger.Printf("[ERR] core: failed to create lock: %v", err) return } // Attempt the acquisition leaderLostCh := c.acquireLock(lock, stopCh) // Bail if we are being shutdown if leaderLostCh == nil { return } c.logger.Printf("[INFO] core: acquired lock, enabling active operation") // Advertise ourself as leader if err := c.advertiseLeader(uuid, leaderLostCh); err != nil { c.logger.Printf("[ERR] core: leader advertisement setup failed: %v", err) lock.Unlock() continue } // Attempt the post-unseal process c.stateLock.Lock() err = c.postUnseal() if err == nil { c.standby = false } c.stateLock.Unlock() // Handle a failure to unseal if err != nil { c.logger.Printf("[ERR] core: post-unseal setup failed: %v", err) lock.Unlock() continue } // Monitor a loss of leadership var manualStepDown bool select { case <-leaderLostCh: c.logger.Printf("[WARN] core: leadership lost, stopping active operation") case <-stopCh: c.logger.Printf("[WARN] core: stopping active operation") case <-manualStepDownCh: c.logger.Printf("[WARN] core: stepping down from active operation to standby") manualStepDown = true } // Clear ourself as leader if err := c.clearLeader(uuid); err != nil { c.logger.Printf("[ERR] core: clearing leader advertisement failed: %v", err) } // Attempt the pre-seal process c.stateLock.Lock() c.standby = true preSealErr := c.preSeal() c.stateLock.Unlock() // Give up leadership lock.Unlock() // Check for a failure to prepare to seal if preSealErr != nil { c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err) } // If we've merely stepped down, we could instantly grab the lock // again. Give the other nodes a chance. if manualStepDown { time.Sleep(manualStepDownSleepPeriod) } } } // periodicCheckKeyUpgrade is used to watch for key rotation events as a standby func (c *Core) periodicCheckKeyUpgrade(doneCh, stopCh chan struct{}) { defer close(doneCh) for { select { case <-time.After(keyRotateCheckInterval): // Only check if we are a standby c.stateLock.RLock() standby := c.standby c.stateLock.RUnlock() if !standby { continue } if err := c.checkKeyUpgrades(); err != nil { c.logger.Printf("[ERR] core: key rotation periodic upgrade check failed: %v", err) } case <-stopCh: return } } } // checkKeyUpgrades is used to check if there have been any key rotations // and if there is a chain of upgrades available func (c *Core) checkKeyUpgrades() error { for { // Check for an upgrade didUpgrade, newTerm, err := c.barrier.CheckUpgrade() if err != nil { return err } // Nothing to do if no upgrade if !didUpgrade { break } c.logger.Printf("[INFO] core: upgraded to key term %d", newTerm) } return nil } // scheduleUpgradeCleanup is used to ensure that all the upgrade paths // are cleaned up in a timely manner if a leader failover takes place func (c *Core) scheduleUpgradeCleanup() error { // List the upgrades upgrades, err := c.barrier.List(keyringUpgradePrefix) if err != nil { return fmt.Errorf("failed to list upgrades: %v", err) } // Nothing to do if no upgrades if len(upgrades) == 0 { return nil } // Schedule cleanup for all of them time.AfterFunc(keyRotateGracePeriod, func() { for _, upgrade := range upgrades { path := fmt.Sprintf("%s%s", keyringUpgradePrefix, upgrade) if err := c.barrier.Delete(path); err != nil { c.logger.Printf("[ERR] core: failed to cleanup upgrade: %s", path) } } }) return nil } // acquireLock blocks until the lock is acquired, returning the leaderLostCh func (c *Core) acquireLock(lock physical.Lock, stopCh <-chan struct{}) <-chan struct{} { for { // Attempt lock acquisition leaderLostCh, err := lock.Lock(stopCh) if err == nil { return leaderLostCh } // Retry the acquisition c.logger.Printf("[ERR] core: failed to acquire lock: %v", err) select { case <-time.After(lockRetryInterval): case <-stopCh: return nil } } } // advertiseLeader is used to advertise the current node as leader func (c *Core) advertiseLeader(uuid string, leaderLostCh <-chan struct{}) error { go c.cleanLeaderPrefix(uuid, leaderLostCh) ent := &Entry{ Key: coreLeaderPrefix + uuid, Value: []byte(c.advertiseAddr), } err := c.barrier.Put(ent) if err != nil { return err } sd, ok := c.ha.(physical.ServiceDiscovery) if ok { go func() { if err := sd.AdvertiseActive(true); err != nil { c.logger.Printf("[WARN] core: failed to advertise active status: %v", err) } }() } return nil } func (c *Core) cleanLeaderPrefix(uuid string, leaderLostCh <-chan struct{}) { keys, err := c.barrier.List(coreLeaderPrefix) if err != nil { c.logger.Printf("[ERR] core: failed to list entries in core/leader: %v", err) return } for len(keys) > 0 { select { case <-time.After(leaderPrefixCleanDelay): if keys[0] != uuid { c.barrier.Delete(coreLeaderPrefix + keys[0]) } keys = keys[1:] case <-leaderLostCh: return } } } // clearLeader is used to clear our leadership entry func (c *Core) clearLeader(uuid string) error { key := coreLeaderPrefix + uuid err := c.barrier.Delete(key) // Advertise ourselves as a standby sd, ok := c.ha.(physical.ServiceDiscovery) if ok { go func() { if err := sd.AdvertiseActive(false); err != nil { c.logger.Printf("[WARN] core: failed to advertise standby status: %v", err) } }() } return err } // emitMetrics is used to periodically expose metrics while runnig func (c *Core) emitMetrics(stopCh chan struct{}) { for { select { case <-time.After(time.Second): c.metricsMutex.Lock() if c.expiration != nil { c.expiration.emitMetrics() } c.metricsMutex.Unlock() case <-stopCh: return } } } func (c *Core) SealAccess() *SealAccess { sa := &SealAccess{} sa.SetSeal(c.seal) return sa }