Provide 'sys/step-down' and 'vault step-down'

This endpoint causes the node it's hit to step down from active duty.
It's a noop if the node isn't active or not running in HA mode. The node
will wait one second before attempting to reacquire the lock, to give
other nodes a chance to grab it.

Fixes #1093
This commit is contained in:
Jeff Mitchell 2016-02-26 19:43:55 -05:00
parent d02d3124b5
commit 11ddd2290b
11 changed files with 374 additions and 15 deletions

10
api/sys_stepdown.go Normal file
View File

@ -0,0 +1,10 @@
package api
func (c *Sys) StepDown() error {
r := c.c.NewRequest("PUT", "/v1/sys/step-down")
resp, err := c.c.RawRequest(r)
if err == nil {
defer resp.Body.Close()
}
return err
}

View File

@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
}, nil
},
"step-down": func() (cli.Command, error) {
return &command.StepDownCommand{
Meta: meta,
}, nil
},
"mount": func() (cli.Command, error) {
return &command.MountCommand{
Meta: meta,

54
command/step-down.go Normal file
View File

@ -0,0 +1,54 @@
package command
import (
"fmt"
"strings"
)
// StepDownCommand is a Command that seals the vault.
type StepDownCommand struct {
Meta
}
func (c *StepDownCommand) Run(args []string) int {
flags := c.Meta.FlagSet("step-down", FlagSetDefault)
flags.Usage = func() { c.Ui.Error(c.Help()) }
if err := flags.Parse(args); err != nil {
return 1
}
client, err := c.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf(
"Error initializing client: %s", err))
return 2
}
if err := client.Sys().StepDown(); err != nil {
c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err))
return 1
}
return 0
}
func (c *StepDownCommand) Synopsis() string {
return "Force the Vault node to give up active duty"
}
func (c *StepDownCommand) Help() string {
helpText := `
Usage: vault step-down [options]
Force the Vault node to step down from active duty.
This causes the indicated node to give up active status. Note that while the
affected node will have a short delay before attempting to grab the lock
again, if no other node grabs the lock beforehand, it is possible for the
same node to re-grab the lock and become active again.
General Options:
` + generalOptionsUsage()
return strings.TrimSpace(helpText)
}

View File

@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler {
mux.Handle("/v1/sys/init", handleSysInit(core))
mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core))
mux.Handle("/v1/sys/seal", handleSysSeal(core))
mux.Handle("/v1/sys/step-down", handleSysStepDown(core))
mux.Handle("/v1/sys/unseal", handleSysUnseal(core))
mux.Handle("/v1/sys/mounts", proxySysRequest(core))
mux.Handle("/v1/sys/mounts/", proxySysRequest(core))

View File

@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler {
})
}
func handleSysStepDown(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case "PUT":
case "POST":
default:
respondError(w, http.StatusMethodNotAllowed, nil)
return
}
// Get the auth for the request so we can access the token directly
req := requestAuth(r, &logical.Request{})
// Seal with the token above
if err := core.StepDown(req.ClientToken); err != nil {
respondError(w, http.StatusInternalServerError, err)
return
}
respondOk(w, nil)
})
}
func handleSysUnseal(core *vault.Core) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Method {

View File

@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) {
httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil)
testResponseStatus(t, httpResp, 204)
}
func TestSysStepDown(t *testing.T) {
core, _, token := vault.TestCoreUnsealed(t)
ln, addr := TestServer(t, core)
defer ln.Close()
TestServerAuth(t, addr, token)
resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil)
testResponseStatus(t, resp, 204)
}

View File

@ -1157,22 +1157,45 @@ func (c *Core) Unseal(key []byte) (bool, error) {
return true, nil
}
// Seal is used to re-seal the Vault. This requires the Vault to
// be unsealed again to perform any further operations.
func (c *Core) Seal(token string) (retErr error) {
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())
// Seal is used to seal the vault
func (c *Core) Seal(token string) error {
return c.stepDownAndSeal(token, true)
}
// StepDown is used to step down from leadership
func (c *Core) StepDown(token string) error {
return c.stepDownAndSeal(token, false)
}
// stepDownAndSeal is used to step down from leadership and, optionally,
// re-seal the Vault. If sealed, this requires the Vault to be unsealed again
// to perform any further operations.
func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) {
if seal {
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())
} else {
defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now())
}
c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.sealed {
return nil
}
if !seal && (c.ha == nil || c.standby) {
return nil
}
// Validate the token is a root token
req := &logical.Request{
Operation: logical.UpdateOperation,
Path: "sys/seal",
ClientToken: token,
}
if seal {
req.Path = "sys/seal"
} else {
req.Path = "sys/step-down"
}
acl, te, err := c.fetchACLandTokenEntry(req)
// Attempt to use the token (decrement num_uses)
@ -1189,8 +1212,8 @@ func (c *Core) Seal(token string) (retErr error) {
// just returning with an error and recommending a vault restart, which
// essentially does the same thing.
if c.standby {
c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead")
return errors.New("vault cannot be sealed when in standby mode; please restart instead")
c.logger.Printf("[ERR] core: vault cannot step down or be sealed when in standby mode; please restart instead")
return errors.New("vault cannot step down or be sealed when in standby mode; please restart instead")
}
return err
}
@ -1207,19 +1230,22 @@ func (c *Core) Seal(token string) (retErr error) {
}
// Seal the Vault
err = c.sealInternal()
if err == nil && retErr == ErrInternalError {
c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation")
if seal {
err = c.sealInternal()
if err == nil && retErr == ErrInternalError {
c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation")
} else {
retErr = err
}
} else {
retErr = err
c.stepDownInternal()
}
return
}
// sealInternal is an internal method used to seal the vault.
// It does not do any authorization checking. The stateLock must
// be held prior to calling.
// sealInternal is an internal method used to seal the vault. It does not do
// any authorization checking. The stateLock must be held prior to calling.
func (c *Core) sealInternal() error {
// Enable that we are sealed to prevent furthur transactions
c.sealed = true
@ -1244,9 +1270,20 @@ func (c *Core) sealInternal() error {
return err
}
c.logger.Printf("[INFO] core: vault is sealed")
return nil
}
// stepDownInternal is an internal method used to step down from active duty.
// It does not do any authorization checking.
func (c *Core) stepDownInternal() {
// Merely trigger the loop to re-run. This value will cause the
// loop to run through giving up leadership, but without triggering
// the return at the end of the next loop run, since it's not
// closed
c.standbyStopCh <- struct{}{}
}
// postUnseal is invoked after the barrier is unsealed, but before
// allowing any user operations. This allows us to setup any state that
// requires the Vault to be unsealed such as mount tables, logical backends,
@ -1443,6 +1480,10 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) {
if preSealErr != nil {
c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err)
}
// If we've merely stepped down, we could instantly grab the lock
// again. Give the other nodes a chance.
time.Sleep(time.Second)
}
}

View File

@ -1183,6 +1183,182 @@ func TestCore_Standby_Seal(t *testing.T) {
}
}
func TestCore_StepDown(t *testing.T) {
// Create the first core and initialize it
inm := physical.NewInmem()
inmha := physical.NewInmemHA()
advertiseOriginal := "http://127.0.0.1:8200"
core, err := NewCore(&CoreConfig{
Physical: inm,
HAPhysical: inmha,
AdvertiseAddr: advertiseOriginal,
DisableMlock: true,
})
if err != nil {
t.Fatalf("err: %v", err)
}
key, root := TestCoreInit(t, core)
if _, err := core.Unseal(TestKeyCopy(key)); err != nil {
t.Fatalf("unseal err: %s", err)
}
// Verify unsealed
sealed, err := core.Sealed()
if err != nil {
t.Fatalf("err checking seal status: %s", err)
}
if sealed {
t.Fatal("should not be sealed")
}
// Wait for core to become active
testWaitActive(t, core)
// Ensure that the original clean function has stopped running
time.Sleep(2 * time.Second)
// Check the leader is local
isLeader, advertise, err := core.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if !isLeader {
t.Fatalf("should be leader")
}
if advertise != advertiseOriginal {
t.Fatalf("Bad advertise: %v", advertise)
}
// Create the second core and initialize it
advertiseOriginal2 := "http://127.0.0.1:8500"
core2, err := NewCore(&CoreConfig{
Physical: inm,
HAPhysical: inmha,
AdvertiseAddr: advertiseOriginal2,
DisableMlock: true,
})
if err != nil {
t.Fatalf("err: %v", err)
}
if _, err := core2.Unseal(TestKeyCopy(key)); err != nil {
t.Fatalf("unseal err: %s", err)
}
// Verify unsealed
sealed, err = core2.Sealed()
if err != nil {
t.Fatalf("err checking seal status: %s", err)
}
if sealed {
t.Fatal("should not be sealed")
}
// Core2 should be in standby
standby, err := core2.Standby()
if err != nil {
t.Fatalf("err: %v", err)
}
if !standby {
t.Fatalf("should be standby")
}
// Check the leader is not local
isLeader, advertise, err = core2.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if isLeader {
t.Fatalf("should not be leader")
}
if advertise != advertiseOriginal {
t.Fatalf("Bad advertise: %v", advertise)
}
// Step down core
err = core.StepDown(root)
if err != nil {
t.Fatal("error stepping down core 1")
}
// Give time to switch leaders
time.Sleep(2 * time.Second)
// Core1 should be in standby
standby, err = core.Standby()
if err != nil {
t.Fatalf("err: %v", err)
}
if !standby {
t.Fatalf("should be standby")
}
// Check the leader is core2
isLeader, advertise, err = core2.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if !isLeader {
t.Fatalf("should be leader")
}
if advertise != advertiseOriginal2 {
t.Fatalf("Bad advertise: %v", advertise)
}
// Check the leader is not local
isLeader, advertise, err = core.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if isLeader {
t.Fatalf("should not be leader")
}
if advertise != advertiseOriginal2 {
t.Fatalf("Bad advertise: %v", advertise)
}
// Step down core2
err = core2.StepDown(root)
if err != nil {
t.Fatal("error stepping down core 1")
}
// Give time to switch leaders
time.Sleep(2 * time.Second)
// Core2 should be in standby
standby, err = core2.Standby()
if err != nil {
t.Fatalf("err: %v", err)
}
if !standby {
t.Fatalf("should be standby")
}
// Check the leader is core1
isLeader, advertise, err = core.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if !isLeader {
t.Fatalf("should be leader")
}
if advertise != advertiseOriginal {
t.Fatalf("Bad advertise: %v", advertise)
}
// Check the leader is not local
isLeader, advertise, err = core2.Leader()
if err != nil {
t.Fatalf("err: %v", err)
}
if isLeader {
t.Fatalf("should not be leader")
}
if advertise != advertiseOriginal {
t.Fatalf("Bad advertise: %v", advertise)
}
}
func TestCore_CleanLeaderPrefix(t *testing.T) {
// Create the first core and initialize it
inm := physical.NewInmem()

View File

@ -11,7 +11,9 @@ description: |-
<dl>
<dt>Description</dt>
<dd>
Seals the Vault. In HA mode, only an active node can be sealed. Standby nodes should be restarted to get the same effect.
Seals the Vault. In HA mode, only an active node can be sealed. Standby
nodes should be restarted to get the same effect. Requires a token with
`root` policy or `sudo` capability on the path.
</dd>
<dt>Method</dt>

View File

@ -0,0 +1,33 @@
---
layout: "http"
page_title: "HTTP API: /sys/step-down"
sidebar_current: "docs-http-ha-step-down"
description: |-
The '/sys/step-down' endpoint causes the node to give up active status.
---
# /sys/seal
<dl>
<dt>Description</dt>
<dd>
Forces the node to give up active status. If the node does not have active
status, this endpoint does nothing. Note that the node will sleep for a
second before attempting to grab the active lock again, but if no standby
nodes grab the active lock in the interim, the same node may become the
active node again. Requires a token with `root` policy or `sudo` capability
on the path.
</dd>
<dt>Method</dt>
<dd>PUT</dd>
<dt>Parameters</dt>
<dd>
None
</dd>
<dt>Returns</dt>
<dd>A `204` response code.
</dd>
</dl>

View File

@ -107,6 +107,9 @@
<li<%= sidebar_current("docs-http-ha-leader") %>>
<a href="/docs/http/sys-leader.html">/sys/leader</a>
</li>
<li<%= sidebar_current("docs-http-ha-step-down") %>>
<a href="/docs/http/sys-step-down.html">/sys/step-down</a>
</li>
</ul>
</li>