From 11ddd2290bbdebc8f46c91d68811ba0dcebfe878 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 26 Feb 2016 19:43:55 -0500 Subject: [PATCH] Provide 'sys/step-down' and 'vault step-down' This endpoint causes the node it's hit to step down from active duty. It's a noop if the node isn't active or not running in HA mode. The node will wait one second before attempting to reacquire the lock, to give other nodes a chance to grab it. Fixes #1093 --- api/sys_stepdown.go | 10 + cli/commands.go | 6 + command/step-down.go | 54 ++++++ http/handler.go | 1 + http/sys_seal.go | 23 +++ http/sys_seal_test.go | 10 + vault/core.go | 69 +++++-- vault/core_test.go | 176 ++++++++++++++++++ website/source/docs/http/sys-seal.html.md | 4 +- .../source/docs/http/sys-step-down.html.md | 33 ++++ website/source/layouts/http.erb | 3 + 11 files changed, 374 insertions(+), 15 deletions(-) create mode 100644 api/sys_stepdown.go create mode 100644 command/step-down.go create mode 100644 website/source/docs/http/sys-step-down.html.md diff --git a/api/sys_stepdown.go b/api/sys_stepdown.go new file mode 100644 index 000000000..421e5f19f --- /dev/null +++ b/api/sys_stepdown.go @@ -0,0 +1,10 @@ +package api + +func (c *Sys) StepDown() error { + r := c.c.NewRequest("PUT", "/v1/sys/step-down") + resp, err := c.c.RawRequest(r) + if err == nil { + defer resp.Body.Close() + } + return err +} diff --git a/cli/commands.go b/cli/commands.go index 05f5c7479..1f5b89f91 100644 --- a/cli/commands.go +++ b/cli/commands.go @@ -224,6 +224,12 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { }, nil }, + "step-down": func() (cli.Command, error) { + return &command.StepDownCommand{ + Meta: meta, + }, nil + }, + "mount": func() (cli.Command, error) { return &command.MountCommand{ Meta: meta, diff --git a/command/step-down.go b/command/step-down.go new file mode 100644 index 000000000..1f2448e56 --- /dev/null +++ b/command/step-down.go @@ -0,0 +1,54 @@ +package command + +import ( + "fmt" + "strings" +) + +// StepDownCommand is a Command that seals the vault. +type StepDownCommand struct { + Meta +} + +func (c *StepDownCommand) Run(args []string) int { + flags := c.Meta.FlagSet("step-down", FlagSetDefault) + flags.Usage = func() { c.Ui.Error(c.Help()) } + if err := flags.Parse(args); err != nil { + return 1 + } + + client, err := c.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf( + "Error initializing client: %s", err)) + return 2 + } + + if err := client.Sys().StepDown(); err != nil { + c.Ui.Error(fmt.Sprintf("Error stepping down: %s", err)) + return 1 + } + + return 0 +} + +func (c *StepDownCommand) Synopsis() string { + return "Force the Vault node to give up active duty" +} + +func (c *StepDownCommand) Help() string { + helpText := ` +Usage: vault step-down [options] + + Force the Vault node to step down from active duty. + + This causes the indicated node to give up active status. Note that while the + affected node will have a short delay before attempting to grab the lock + again, if no other node grabs the lock beforehand, it is possible for the + same node to re-grab the lock and become active again. + +General Options: + + ` + generalOptionsUsage() + return strings.TrimSpace(helpText) +} diff --git a/http/handler.go b/http/handler.go index bd2f2dafc..5508a9539 100644 --- a/http/handler.go +++ b/http/handler.go @@ -23,6 +23,7 @@ func Handler(core *vault.Core) http.Handler { mux.Handle("/v1/sys/init", handleSysInit(core)) mux.Handle("/v1/sys/seal-status", handleSysSealStatus(core)) mux.Handle("/v1/sys/seal", handleSysSeal(core)) + mux.Handle("/v1/sys/step-down", handleSysStepDown(core)) mux.Handle("/v1/sys/unseal", handleSysUnseal(core)) mux.Handle("/v1/sys/mounts", proxySysRequest(core)) mux.Handle("/v1/sys/mounts/", proxySysRequest(core)) diff --git a/http/sys_seal.go b/http/sys_seal.go index d5ac76624..a11a2078b 100644 --- a/http/sys_seal.go +++ b/http/sys_seal.go @@ -34,6 +34,29 @@ func handleSysSeal(core *vault.Core) http.Handler { }) } +func handleSysStepDown(core *vault.Core) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "PUT": + case "POST": + default: + respondError(w, http.StatusMethodNotAllowed, nil) + return + } + + // Get the auth for the request so we can access the token directly + req := requestAuth(r, &logical.Request{}) + + // Seal with the token above + if err := core.StepDown(req.ClientToken); err != nil { + respondError(w, http.StatusInternalServerError, err) + return + } + + respondOk(w, nil) + }) +} + func handleSysUnseal(core *vault.Core) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.Method { diff --git a/http/sys_seal_test.go b/http/sys_seal_test.go index 4b3008276..e1cca89a6 100644 --- a/http/sys_seal_test.go +++ b/http/sys_seal_test.go @@ -304,3 +304,13 @@ func TestSysSeal_Permissions(t *testing.T) { httpResp = testHttpPut(t, "child", addr+"/v1/sys/seal", nil) testResponseStatus(t, httpResp, 204) } + +func TestSysStepDown(t *testing.T) { + core, _, token := vault.TestCoreUnsealed(t) + ln, addr := TestServer(t, core) + defer ln.Close() + TestServerAuth(t, addr, token) + + resp := testHttpPut(t, token, addr+"/v1/sys/step-down", nil) + testResponseStatus(t, resp, 204) +} diff --git a/vault/core.go b/vault/core.go index 75df72601..fa5405843 100644 --- a/vault/core.go +++ b/vault/core.go @@ -1157,22 +1157,45 @@ func (c *Core) Unseal(key []byte) (bool, error) { return true, nil } -// Seal is used to re-seal the Vault. This requires the Vault to -// be unsealed again to perform any further operations. -func (c *Core) Seal(token string) (retErr error) { - defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) +// Seal is used to seal the vault +func (c *Core) Seal(token string) error { + return c.stepDownAndSeal(token, true) +} + +// StepDown is used to step down from leadership +func (c *Core) StepDown(token string) error { + return c.stepDownAndSeal(token, false) +} + +// stepDownAndSeal is used to step down from leadership and, optionally, +// re-seal the Vault. If sealed, this requires the Vault to be unsealed again +// to perform any further operations. +func (c *Core) stepDownAndSeal(token string, seal bool) (retErr error) { + if seal { + defer metrics.MeasureSince([]string{"core", "seal"}, time.Now()) + } else { + defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now()) + } + c.stateLock.Lock() defer c.stateLock.Unlock() if c.sealed { return nil } + if !seal && (c.ha == nil || c.standby) { + return nil + } // Validate the token is a root token req := &logical.Request{ Operation: logical.UpdateOperation, - Path: "sys/seal", ClientToken: token, } + if seal { + req.Path = "sys/seal" + } else { + req.Path = "sys/step-down" + } acl, te, err := c.fetchACLandTokenEntry(req) // Attempt to use the token (decrement num_uses) @@ -1189,8 +1212,8 @@ func (c *Core) Seal(token string) (retErr error) { // just returning with an error and recommending a vault restart, which // essentially does the same thing. if c.standby { - c.logger.Printf("[ERR] core: vault cannot be sealed when in standby mode; please restart instead") - return errors.New("vault cannot be sealed when in standby mode; please restart instead") + c.logger.Printf("[ERR] core: vault cannot step down or be sealed when in standby mode; please restart instead") + return errors.New("vault cannot step down or be sealed when in standby mode; please restart instead") } return err } @@ -1207,19 +1230,22 @@ func (c *Core) Seal(token string) (retErr error) { } // Seal the Vault - err = c.sealInternal() - if err == nil && retErr == ErrInternalError { - c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") + if seal { + err = c.sealInternal() + if err == nil && retErr == ErrInternalError { + c.logger.Printf("[ERR] core: core is successfully sealed but another error occurred during the operation") + } else { + retErr = err + } } else { - retErr = err + c.stepDownInternal() } return } -// sealInternal is an internal method used to seal the vault. -// It does not do any authorization checking. The stateLock must -// be held prior to calling. +// sealInternal is an internal method used to seal the vault. It does not do +// any authorization checking. The stateLock must be held prior to calling. func (c *Core) sealInternal() error { // Enable that we are sealed to prevent furthur transactions c.sealed = true @@ -1244,9 +1270,20 @@ func (c *Core) sealInternal() error { return err } c.logger.Printf("[INFO] core: vault is sealed") + return nil } +// stepDownInternal is an internal method used to step down from active duty. +// It does not do any authorization checking. +func (c *Core) stepDownInternal() { + // Merely trigger the loop to re-run. This value will cause the + // loop to run through giving up leadership, but without triggering + // the return at the end of the next loop run, since it's not + // closed + c.standbyStopCh <- struct{}{} +} + // postUnseal is invoked after the barrier is unsealed, but before // allowing any user operations. This allows us to setup any state that // requires the Vault to be unsealed such as mount tables, logical backends, @@ -1443,6 +1480,10 @@ func (c *Core) runStandby(doneCh, stopCh chan struct{}) { if preSealErr != nil { c.logger.Printf("[ERR] core: pre-seal teardown failed: %v", err) } + + // If we've merely stepped down, we could instantly grab the lock + // again. Give the other nodes a chance. + time.Sleep(time.Second) } } diff --git a/vault/core_test.go b/vault/core_test.go index 1f9f80bd0..e597d767e 100644 --- a/vault/core_test.go +++ b/vault/core_test.go @@ -1183,6 +1183,182 @@ func TestCore_Standby_Seal(t *testing.T) { } } +func TestCore_StepDown(t *testing.T) { + // Create the first core and initialize it + inm := physical.NewInmem() + inmha := physical.NewInmemHA() + advertiseOriginal := "http://127.0.0.1:8200" + core, err := NewCore(&CoreConfig{ + Physical: inm, + HAPhysical: inmha, + AdvertiseAddr: advertiseOriginal, + DisableMlock: true, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + key, root := TestCoreInit(t, core) + if _, err := core.Unseal(TestKeyCopy(key)); err != nil { + t.Fatalf("unseal err: %s", err) + } + + // Verify unsealed + sealed, err := core.Sealed() + if err != nil { + t.Fatalf("err checking seal status: %s", err) + } + if sealed { + t.Fatal("should not be sealed") + } + + // Wait for core to become active + testWaitActive(t, core) + + // Ensure that the original clean function has stopped running + time.Sleep(2 * time.Second) + + // Check the leader is local + isLeader, advertise, err := core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Create the second core and initialize it + advertiseOriginal2 := "http://127.0.0.1:8500" + core2, err := NewCore(&CoreConfig{ + Physical: inm, + HAPhysical: inmha, + AdvertiseAddr: advertiseOriginal2, + DisableMlock: true, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + if _, err := core2.Unseal(TestKeyCopy(key)); err != nil { + t.Fatalf("unseal err: %s", err) + } + + // Verify unsealed + sealed, err = core2.Sealed() + if err != nil { + t.Fatalf("err checking seal status: %s", err) + } + if sealed { + t.Fatal("should not be sealed") + } + + // Core2 should be in standby + standby, err := core2.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is not local + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Step down core + err = core.StepDown(root) + if err != nil { + t.Fatal("error stepping down core 1") + } + + // Give time to switch leaders + time.Sleep(2 * time.Second) + + // Core1 should be in standby + standby, err = core.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is core2 + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal2 { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Check the leader is not local + isLeader, advertise, err = core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal2 { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Step down core2 + err = core2.StepDown(root) + if err != nil { + t.Fatal("error stepping down core 1") + } + + // Give time to switch leaders + time.Sleep(2 * time.Second) + + // Core2 should be in standby + standby, err = core2.Standby() + if err != nil { + t.Fatalf("err: %v", err) + } + if !standby { + t.Fatalf("should be standby") + } + + // Check the leader is core1 + isLeader, advertise, err = core.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if !isLeader { + t.Fatalf("should be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } + + // Check the leader is not local + isLeader, advertise, err = core2.Leader() + if err != nil { + t.Fatalf("err: %v", err) + } + if isLeader { + t.Fatalf("should not be leader") + } + if advertise != advertiseOriginal { + t.Fatalf("Bad advertise: %v", advertise) + } +} + func TestCore_CleanLeaderPrefix(t *testing.T) { // Create the first core and initialize it inm := physical.NewInmem() diff --git a/website/source/docs/http/sys-seal.html.md b/website/source/docs/http/sys-seal.html.md index 55d5a81a9..d82b9af38 100644 --- a/website/source/docs/http/sys-seal.html.md +++ b/website/source/docs/http/sys-seal.html.md @@ -11,7 +11,9 @@ description: |-
Description
- Seals the Vault. In HA mode, only an active node can be sealed. Standby nodes should be restarted to get the same effect. + Seals the Vault. In HA mode, only an active node can be sealed. Standby + nodes should be restarted to get the same effect. Requires a token with + `root` policy or `sudo` capability on the path.
Method
diff --git a/website/source/docs/http/sys-step-down.html.md b/website/source/docs/http/sys-step-down.html.md new file mode 100644 index 000000000..94f5aa4c2 --- /dev/null +++ b/website/source/docs/http/sys-step-down.html.md @@ -0,0 +1,33 @@ +--- +layout: "http" +page_title: "HTTP API: /sys/step-down" +sidebar_current: "docs-http-ha-step-down" +description: |- + The '/sys/step-down' endpoint causes the node to give up active status. +--- + +# /sys/seal + +
+
Description
+
+ Forces the node to give up active status. If the node does not have active + status, this endpoint does nothing. Note that the node will sleep for a + second before attempting to grab the active lock again, but if no standby + nodes grab the active lock in the interim, the same node may become the + active node again. Requires a token with `root` policy or `sudo` capability + on the path. +
+ +
Method
+
PUT
+ +
Parameters
+
+ None +
+ +
Returns
+
A `204` response code. +
+
diff --git a/website/source/layouts/http.erb b/website/source/layouts/http.erb index ac69d5651..ee75b37f9 100644 --- a/website/source/layouts/http.erb +++ b/website/source/layouts/http.erb @@ -107,6 +107,9 @@ > /sys/leader + > + /sys/step-down +