command/lock: Add -child-exitcode, return 2 on child error (#3329)

* Exit 2 if -child-exit-code and the child returned with an error. * There is no platform independent way to check the exact return code of * the child, so on error always return 2. * Closes #947 * Closes #1503
2017-07-26 22:09:19 -07:00 · 2017-07-26 22:09:19 -07:00 · 9b70009ab8
parent 005c9ad12a
commit 9b70009ab8
3 changed files with 64 additions and 14 deletions
--- a/command/lock.go
+++ b/command/lock.go
@ -41,7 +41,6 @@ type LockCommand struct {
 	child     *os.Process
 	childLock sync.Mutex
 	verbose   bool
 }
@ -75,14 +74,18 @@ func (c *LockCommand) Run(args []string) int {
 }
 func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 	var childDone chan struct{}
 	var limit int
 	var monitorRetry int
 	var name string
 	var passStdin bool
 	var propagateChildCode bool
 	var timeout time.Duration
 	f := c.BaseCommand.NewFlagSet(c)
 	f.BoolVar(&propagateChildCode, "child-exit-code", false,
 		"Exit 2 if the child process exited with an error if this is true, "+
 			"otherwise this doesn't propagate an error from the child. The "+
 			"default value is false.")
 	f.IntVar(&limit, "n", 1,
 		"Optional limit on the number of concurrent lock holders. The underlying "+
 			"implementation switches from a lock to a semaphore when the value is "+
@ -100,7 +103,7 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 		"Pass stdin to the child process.")
 	f.DurationVar(&timeout, "timeout", 0,
 		"Maximum amount of time to wait to acquire the lock, specified as a "+
-			"timestamp like \"1s\" or \"3h\". The default value is 0.")
+			"duration like \"1s\" or \"3h\". The default value is 0.")
 	f.BoolVar(&c.verbose, "verbose", false,
 		"Enable verbose (debugging) output.")
@ -185,6 +188,8 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 	}
 	// Check if we were shutdown but managed to still acquire the lock
 	var childCode int
 	var childErr chan error
 	select {
 	case <-c.ShutdownCh:
 		c.UI.Error("Shutdown triggered during lock acquisition")
@ -193,11 +198,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 	}
 	// Start the child process
-	childDone = make(chan struct{})
+	childErr = make(chan error, 1)
 	go func() {
-		if err := c.startChild(script, childDone, passStdin); err != nil {
+		childErr <- c.startChild(script, passStdin)
 			c.UI.Error(fmt.Sprintf("%s", err))
 		}
 	}()
 	// Monitor for shutdown, child termination, or lock loss
@ -210,7 +213,10 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 		if c.verbose {
 			c.UI.Info("Lock lost, killing child")
 		}
-	case <-childDone:
+	case err := <-childErr:
 		if err != nil {
 			childCode = 2
 		}
 		if c.verbose {
 			c.UI.Info("Child terminated, releasing lock")
 		}
@ -220,8 +226,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
 	// Prevent starting a new child.  The lock is never released
 	// after this point.
 	c.childLock.Lock()
 	// Kill any existing child
-	if err := c.killChild(childDone); err != nil {
+	if err := c.killChild(childErr); err != nil {
 		c.UI.Error(fmt.Sprintf("%s", err))
 	}
@ -243,6 +250,13 @@ RELEASE:
 	} else if c.verbose {
 		c.UI.Info("Cleanup succeeded")
 	}
 	// If we detected an error from the child process then we propagate
 	// that.
 	if propagateChildCode {
 		return childCode
 	}
 	return 0
 }
@ -321,8 +335,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name
 // startChild is a long running routine used to start and
 // wait for the child process to exit.
-func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin bool) error {
+func (c *LockCommand) startChild(script string, passStdin bool) error {
 	defer close(doneCh)
 	if c.verbose {
 		c.UI.Info(fmt.Sprintf("Starting handler '%s'", script))
 	}
@ -373,7 +386,7 @@ func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin
 // termination.
 // On Windows, the child is always hard terminated with a SIGKILL, even
 // on the first attempt.
-func (c *LockCommand) killChild(childDone chan struct{}) error {
+func (c *LockCommand) killChild(childErr chan error) error {
 	// Get the child process
 	child := c.child
@ -395,7 +408,7 @@ func (c *LockCommand) killChild(childDone chan struct{}) error {
 	// Wait for termination, or until a timeout
 	select {
-	case <-childDone:
+	case <-childErr:
 		if c.verbose {
 			c.UI.Info("Child terminated")
 		}
--- a/command/lock_test.go
+++ b/command/lock_test.go
@ -257,3 +257,33 @@ func TestLockCommand_MonitorRetry_Semaphore_Arg(t *testing.T) {
 		t.Fatalf("bad: %#v", opts)
 	}
 }
 func TestLockCommand_ChildExitCode(t *testing.T) {
 	t.Parallel()
 	a := agent.NewTestAgent(t.Name(), nil)
 	defer a.Shutdown()
 	t.Run("clean exit", func(t *testing.T) {
 		_, c := testLockCommand(t)
 		args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 0"}
 		if got, want := c.Run(args), 0; got != want {
 			t.Fatalf("got %d want %d", got, want)
 		}
 	})
 	t.Run("error exit", func(t *testing.T) {
 		_, c := testLockCommand(t)
 		args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 1"}
 		if got, want := c.Run(args), 2; got != want {
 			t.Fatalf("got %d want %d", got, want)
 		}
 	})
 	t.Run("not propagated", func(t *testing.T) {
 		_, c := testLockCommand(t)
 		args := []string{"-http-addr=" + a.HTTPAddr(), "test/prefix", "exit 1"}
 		if got, want := c.Run(args), 0; got != want {
 			t.Fatalf("got %d want %d", got, want)
 		}
 	})
 }
--- a/website/source/docs/commands/lock.html.markdown.erb
+++ b/website/source/docs/commands/lock.html.markdown.erb
@ -52,6 +52,10 @@ Windows has no POSIX compatible notion for `SIGTERM`.
 #### Command Options
 * `-child-exit-code` - Exit 2 if the child process exited with an error
  if this is true, otherwise this doesn't propagate an error from the
  child. The default value is false.
 * `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error
   while monitoring the lock. This allows riding out brief periods of unavailability
   without causing leader elections, but increases the amount of time required
@ -67,6 +71,9 @@ Windows has no POSIX compatible notion for `SIGTERM`.
 * `-pass-stdin` - Pass stdin to child process.
 * `timeout` - Maximum amount of time to wait to acquire the lock, specified
   as a duration like `1s` or `3h`. The default value is 0.
 * `-try` - Attempt to acquire the lock up to the given timeout. The timeout is a
  positive decimal number, with unit suffix, such as "500ms". Valid time units
  are "ns", "us" (or "µs"), "ms", "s", "m", "h".