Merge pull request #1146 from hashicorp/step-down
Provide 'sys/step-down' and 'vault step-down'
This commit is contained in:
commit
3e7bca82a1
|
@ -0,0 +1,10 @@
|
|||
package api
|
||||
|
||||
func (c *Sys) StepDown() error {
|
||||
r := c.c.NewRequest("PUT", "/v1/sys/step-down")
|
||||
resp, err := c.c.RawRequest(r)
|
||||
if err == nil {
|
||||
defer resp.Body.Close()
|
||||
}
|
||||
return err
|
||||
}
|
|
@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
|
|||
}, nil
|
||||
},
|
||||
|
||||
"step-down": func() (cli.Command, error) {
|
||||
return &command.StepDownCommand{
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
|
||||
"mount": func() (cli.Command, error) {
|
||||
return &command.MountCommand{
|
||||
Meta: meta,
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// StepDownCommand is a Command that seals the vault.
|
||||
type StepDownCommand struct {
|
||||
Meta
|
||||
}
|
||||
|
||||
func (c *StepDownCommand) Run(args []string) int {
|
||||
flags := c.Meta.FlagSet("step-down", FlagSetDefault)
|
||||
flags.Usage = func() { c.Ui.Error(c.Help()) }
|
||||
if err := flags.Parse(args); err != nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
client, err := c.Client()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf(
|
||||
"Error initializing client: %s", err))
|
||||
return 2
|
||||
}
|
||||
|
||||
if err := client.Sys().StepDown(); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *StepDownCommand) Synopsis() string {
|
||||
return "Force the Vault node to give up active duty"
|
||||
}
|
||||
|
||||
func (c *StepDownCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: vault step-down [options]
|
||||
|
||||
Force the Vault node to step down from active duty.
|
||||
|
||||
This causes the indicated node to give up active status. Note that while the
|
||||
affected node will have a short delay before attempting to grab the lock
|
||||
again, if no other node grabs the lock beforehand, it is possible for the
|
||||
same node to re-grab the lock and become active again.
|
||||
|
||||
General Options:
|
||||
|
||||
` + generalOptionsUsage()
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
|
@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler {
|
|||
mux.Handle("/v1/sys/init", handleSysInit(core))
|
||||
mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core))
|
||||
mux.Handle("/v1/sys/seal", handleSysSeal(core))
|
||||
mux.Handle("/v1/sys/step-down", handleSysStepDown(core))
|
||||
mux.Handle("/v1/sys/unseal", handleSysUnseal(core))
|
||||
mux.Handle("/v1/sys/renew/", handleLogical(core, false))
|
||||
mux.Handle("/v1/sys/leader", handleSysLeader(core))
|
||||
|
|
|
@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler {
|
|||
})
|
||||
}
|
||||
|
||||
func handleSysStepDown(core *vault.Core) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.Method {
|
||||
case "PUT":
|
||||
case "POST":
|
||||
default:
|
||||
respondError(w, http.StatusMethodNotAllowed, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// Get the auth for the request so we can access the token directly
|
||||
req := requestAuth(r, &logical.Request{})
|
||||
|
||||
// Seal with the token above
|
||||
if err := core.StepDown(req.ClientToken); err != nil {
|
||||
respondError(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
|
||||
respondOk(w, nil)
|
||||
})
|
||||
}
|
||||
|
||||
func handleSysUnseal(core *vault.Core) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.Method {
|
||||
|
|
|
@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) {
|
|||
httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil)
|
||||
testResponseStatus(t, httpResp, 204)
|
||||
}
|
||||
|
||||
func TestSysStepDown(t *testing.T) {
|
||||
core, _, token := vault.TestCoreUnsealed(t)
|
||||
ln, addr := TestServer(t, core)
|
||||
defer ln.Close()
|
||||
TestServerAuth(t, addr, token)
|
||||
|
||||
resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil)
|
||||
testResponseStatus(t, resp, 204)
|
||||
}
|
||||
|
|
110
vault/core.go
110
vault/core.go
|
@ -64,6 +64,10 @@ const (
|
|||
// leaderPrefixCleanDelay is how long to wait between deletions
|
||||
// of orphaned leader keys, to prevent slamming the backend.
|
||||
leaderPrefixCleanDelay = 200 * time.Millisecond
|
||||
|
||||
// manualStepDownSleepPeriod is how long to sleep after a user-initiated
|
||||
// step down of the active node, to prevent instantly regrabbing the lock
|
||||
manualStepDownSleepPeriod = 10 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -206,9 +210,10 @@ type Core struct {
|
|||
stateLock sync.RWMutex
|
||||
sealed bool
|
||||
|
||||
standby bool
|
||||
standbyDoneCh chan struct{}
|
||||
standbyStopCh chan struct{}
|
||||
standby bool
|
||||
standbyDoneCh chan struct{}
|
||||
standbyStopCh chan struct{}
|
||||
manualStepDownCh chan struct{}
|
||||
|
||||
// unlockParts has the keys provided to Unseal until
|
||||
// the threshold number of parts is available.
|
||||
|
@ -1149,7 +1154,8 @@ func (c *Core) Unseal(key []byte) (bool, error) {
|
|||
// Go to standby mode, wait until we are active to unseal
|
||||
c.standbyDoneCh = make(chan struct{})
|
||||
c.standbyStopCh = make(chan struct{})
|
||||
go c.runStandby(c.standbyDoneCh, c.standbyStopCh)
|
||||
c.manualStepDownCh = make(chan struct{})
|
||||
go c.runStandby(c.standbyDoneCh, c.standbyStopCh, c.manualStepDownCh)
|
||||
}
|
||||
|
||||
// Success!
|
||||
|
@ -1161,6 +1167,7 @@ func (c *Core) Unseal(key []byte) (bool, error) {
|
|||
// be unsealed again to perform any further operations.
|
||||
func (c *Core) Seal(token string) (retErr error) {
|
||||
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())
|
||||
|
||||
c.stateLock.Lock()
|
||||
defer c.stateLock.Unlock()
|
||||
if c.sealed {
|
||||
|
@ -1173,15 +1180,8 @@ func (c *Core) Seal(token string) (retErr error) {
|
|||
Path: "sys/seal",
|
||||
ClientToken: token,
|
||||
}
|
||||
acl, te, err := c.fetchACLandTokenEntry(req)
|
||||
|
||||
// Attempt to use the token (decrement num_uses)
|
||||
if te != nil {
|
||||
if err := c.tokenStore.UseToken(te); err != nil {
|
||||
c.logger.Printf("[ERR] core: failed to use token: %v", err)
|
||||
retErr = ErrInternalError
|
||||
}
|
||||
}
|
||||
acl, te, err := c.fetchACLandTokenEntry(req)
|
||||
if err != nil {
|
||||
// Since there is no token store in standby nodes, sealing cannot
|
||||
// be done. Ideally, the request has to be forwarded to leader node
|
||||
|
@ -1189,11 +1189,20 @@ func (c *Core) Seal(token string) (retErr error) {
|
|||
// just returning with an error and recommending a vault restart, which
|
||||
// essentially does the same thing.
|
||||
if c.standby {
|
||||
c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead")
|
||||
return errors.New("vault cannot be sealed when in standby mode; please restart instead")
|
||||
c.logger.Printf("[ERR] core: vault cannot seal when in standby mode; please restart instead")
|
||||
return errors.New("vault cannot seal when in standby mode; please restart instead")
|
||||
}
|
||||
return err
|
||||
}
|
||||
// Attempt to use the token (decrement num_uses)
|
||||
// If we can't, we still continue attempting the seal, so long as the token
|
||||
// has appropriate permissions
|
||||
if te != nil {
|
||||
if err := c.tokenStore.UseToken(te); err != nil {
|
||||
c.logger.Printf("[ERR] core: failed to use token: %v", err)
|
||||
retErr = ErrInternalError
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that this operation is allowed
|
||||
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
|
||||
|
@ -1206,7 +1215,7 @@ func (c *Core) Seal(token string) (retErr error) {
|
|||
return logical.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// Seal the Vault
|
||||
//Seal the Vault
|
||||
err = c.sealInternal()
|
||||
if err == nil && retErr == ErrInternalError {
|
||||
c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation")
|
||||
|
@ -1217,9 +1226,60 @@ func (c *Core) Seal(token string) (retErr error) {
|
|||
return
|
||||
}
|
||||
|
||||
// sealInternal is an internal method used to seal the vault.
|
||||
// It does not do any authorization checking. The stateLock must
|
||||
// be held prior to calling.
|
||||
// StepDown is used to step down from leadership
|
||||
func (c *Core) StepDown(token string) error {
|
||||
defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now())
|
||||
|
||||
c.stateLock.Lock()
|
||||
defer c.stateLock.Unlock()
|
||||
if c.sealed {
|
||||
return nil
|
||||
}
|
||||
if c.ha == nil || c.standby {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate the token is a root token
|
||||
req := &logical.Request{
|
||||
Operation: logical.UpdateOperation,
|
||||
Path: "sys/step-down",
|
||||
ClientToken: token,
|
||||
}
|
||||
|
||||
acl, te, err := c.fetchACLandTokenEntry(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Attempt to use the token (decrement num_uses)
|
||||
if te != nil {
|
||||
if err := c.tokenStore.UseToken(te); err != nil {
|
||||
c.logger.Printf("[ERR] core: failed to use token: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that this operation is allowed
|
||||
allowed, rootPrivs := acl.AllowOperation(req.Operation, req.Path)
|
||||
if !allowed {
|
||||
return logical.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// We always require root privileges for this operation
|
||||
if !rootPrivs {
|
||||
return logical.ErrPermissionDenied
|
||||
}
|
||||
|
||||
select {
|
||||
case c.manualStepDownCh <- struct{}{}:
|
||||
default:
|
||||
c.logger.Printf("[WARN] core: manual step-down operation already queued")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// sealInternal is an internal method used to seal the vault. It does not do
|
||||
// any authorization checking. The stateLock must be held prior to calling.
|
||||
func (c *Core) sealInternal() error {
|
||||
// Enable that we are sealed to prevent furthur transactions
|
||||
c.sealed = true
|
||||
|
@ -1244,6 +1304,7 @@ func (c *Core) sealInternal() error {
|
|||
return err
|
||||
}
|
||||
c.logger.Printf("[INFO] core: vault is sealed")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -1353,8 +1414,9 @@ func (c *Core) preSeal() error {
|
|||
// runStandby is a long running routine that is used when an HA backend
|
||||
// is enabled. It waits until we are leader and switches this Vault to
|
||||
// active.
|
||||
func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
|
||||
func (c *Core) runStandby(doneCh, stopCh, manualStepDownCh chan struct{}) {
|
||||
defer close(doneCh)
|
||||
defer close(manualStepDownCh)
|
||||
c.logger.Printf("[INFO] core: entering standby mode")
|
||||
|
||||
// Monitor for key rotation
|
||||
|
@ -1418,11 +1480,15 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
|
|||
}
|
||||
|
||||
// Monitor a loss of leadership
|
||||
var manualStepDown bool
|
||||
select {
|
||||
case <-leaderLostCh:
|
||||
c.logger.Printf("[WARN] core: leadership lost, stopping active operation")
|
||||
case <-stopCh:
|
||||
c.logger.Printf("[WARN] core: stopping active operation")
|
||||
case <-manualStepDownCh:
|
||||
c.logger.Printf("[WARN] core: stepping down from active operation to standby")
|
||||
manualStepDown = true
|
||||
}
|
||||
|
||||
// Clear ourself as leader
|
||||
|
@ -1443,6 +1509,12 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
|
|||
if preSealErr != nil {
|
||||
c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err)
|
||||
}
|
||||
|
||||
// If we've merely stepped down, we could instantly grab the lock
|
||||
// again. Give the other nodes a chance.
|
||||
if manualStepDown {
|
||||
time.Sleep(manualStepDownSleepPeriod)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1106,9 +1106,6 @@ func TestCore_Standby_Seal(t *testing.T) {
|
|||
// Wait for core to become active
|
||||
testWaitActive(t, core)
|
||||
|
||||
// Ensure that the original clean function has stopped running
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// Check the leader is local
|
||||
isLeader, advertise, err := core.Leader()
|
||||
if err != nil {
|
||||
|
@ -1183,6 +1180,180 @@ func TestCore_Standby_Seal(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCore_StepDown(t *testing.T) {
|
||||
// Create the first core and initialize it
|
||||
inm := physical.NewInmem()
|
||||
inmha := physical.NewInmemHA()
|
||||
advertiseOriginal := "http://127.0.0.1:8200"
|
||||
core, err := NewCore(&CoreConfig{
|
||||
Physical: inm,
|
||||
HAPhysical: inmha,
|
||||
AdvertiseAddr: advertiseOriginal,
|
||||
DisableMlock: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
key, root := TestCoreInit(t, core)
|
||||
if _, err := core.Unseal(TestKeyCopy(key)); err != nil {
|
||||
t.Fatalf("unseal err: %s", err)
|
||||
}
|
||||
|
||||
// Verify unsealed
|
||||
sealed, err := core.Sealed()
|
||||
if err != nil {
|
||||
t.Fatalf("err checking seal status: %s", err)
|
||||
}
|
||||
if sealed {
|
||||
t.Fatal("should not be sealed")
|
||||
}
|
||||
|
||||
// Wait for core to become active
|
||||
testWaitActive(t, core)
|
||||
|
||||
// Check the leader is local
|
||||
isLeader, advertise, err := core.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !isLeader {
|
||||
t.Fatalf("should be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
|
||||
// Create the second core and initialize it
|
||||
advertiseOriginal2 := "http://127.0.0.1:8500"
|
||||
core2, err := NewCore(&CoreConfig{
|
||||
Physical: inm,
|
||||
HAPhysical: inmha,
|
||||
AdvertiseAddr: advertiseOriginal2,
|
||||
DisableMlock: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := core2.Unseal(TestKeyCopy(key)); err != nil {
|
||||
t.Fatalf("unseal err: %s", err)
|
||||
}
|
||||
|
||||
// Verify unsealed
|
||||
sealed, err = core2.Sealed()
|
||||
if err != nil {
|
||||
t.Fatalf("err checking seal status: %s", err)
|
||||
}
|
||||
if sealed {
|
||||
t.Fatal("should not be sealed")
|
||||
}
|
||||
|
||||
// Core2 should be in standby
|
||||
standby, err := core2.Standby()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !standby {
|
||||
t.Fatalf("should be standby")
|
||||
}
|
||||
|
||||
// Check the leader is not local
|
||||
isLeader, advertise, err = core2.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if isLeader {
|
||||
t.Fatalf("should not be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
|
||||
// Step down core
|
||||
err = core.StepDown(root)
|
||||
if err != nil {
|
||||
t.Fatal("error stepping down core 1")
|
||||
}
|
||||
|
||||
// Give time to switch leaders
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// Core1 should be in standby
|
||||
standby, err = core.Standby()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !standby {
|
||||
t.Fatalf("should be standby")
|
||||
}
|
||||
|
||||
// Check the leader is core2
|
||||
isLeader, advertise, err = core2.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !isLeader {
|
||||
t.Fatalf("should be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal2 {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
|
||||
// Check the leader is not local
|
||||
isLeader, advertise, err = core.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if isLeader {
|
||||
t.Fatalf("should not be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal2 {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
|
||||
// Step down core2
|
||||
err = core2.StepDown(root)
|
||||
if err != nil {
|
||||
t.Fatal("error stepping down core 1")
|
||||
}
|
||||
|
||||
// Give time to switch leaders -- core 1 will still be waiting on its
|
||||
// cooling off period so give it a full 10 seconds to recover
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
// Core2 should be in standby
|
||||
standby, err = core2.Standby()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !standby {
|
||||
t.Fatalf("should be standby")
|
||||
}
|
||||
|
||||
// Check the leader is core1
|
||||
isLeader, advertise, err = core.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !isLeader {
|
||||
t.Fatalf("should be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
|
||||
// Check the leader is not local
|
||||
isLeader, advertise, err = core2.Leader()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if isLeader {
|
||||
t.Fatalf("should not be leader")
|
||||
}
|
||||
if advertise != advertiseOriginal {
|
||||
t.Fatalf("Bad advertise: %v", advertise)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCore_CleanLeaderPrefix(t *testing.T) {
|
||||
// Create the first core and initialize it
|
||||
inm := physical.NewInmem()
|
||||
|
|
|
@ -11,7 +11,9 @@ description: |-
|
|||
<dl>
|
||||
<dt>Description</dt>
|
||||
<dd>
|
||||
Seals the Vault. In HA mode, only an active node can be sealed. Standby nodes should be restarted to get the same effect.
|
||||
Seals the Vault. In HA mode, only an active node can be sealed. Standby
|
||||
nodes should be restarted to get the same effect. Requires a token with
|
||||
`root` policy or `sudo` capability on the path.
|
||||
</dd>
|
||||
|
||||
<dt>Method</dt>
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
layout: "http"
|
||||
page_title: "HTTP API: /sys/step-down"
|
||||
sidebar_current: "docs-http-ha-step-down"
|
||||
description: |-
|
||||
The '/sys/step-down' endpoint causes the node to give up active status.
|
||||
---
|
||||
|
||||
# /sys/seal
|
||||
|
||||
<dl>
|
||||
<dt>Description</dt>
|
||||
<dd>
|
||||
Forces the node to give up active status. If the node does not have active
|
||||
status, this endpoint does nothing. Note that the node will sleep for ten
|
||||
seconds before attempting to grab the active lock again, but if no standby
|
||||
nodes grab the active lock in the interim, the same node may become the
|
||||
active node again. Requires a token with `root` policy or `sudo` capability
|
||||
on the path.
|
||||
</dd>
|
||||
|
||||
<dt>Method</dt>
|
||||
<dd>PUT</dd>
|
||||
|
||||
<dt>Parameters</dt>
|
||||
<dd>
|
||||
None
|
||||
</dd>
|
||||
|
||||
<dt>Returns</dt>
|
||||
<dd>A `204` response code.
|
||||
</dd>
|
||||
</dl>
|
|
@ -107,6 +107,9 @@
|
|||
<li<%= sidebar_current("docs-http-ha-leader") %>>
|
||||
<a href="/docs/http/sys-leader.html">/sys/leader</a>
|
||||
</li>
|
||||
<li<%= sidebar_current("docs-http-ha-step-down") %>>
|
||||
<a href="/docs/http/sys-step-down.html">/sys/step-down</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
|
|
Loading…
Reference in New Issue