command/lock: Add -child-exitcode, return 2 on child error (#3329)
* Exit 2 if -child-exit-code and the child returned with an error. * There is no platform independent way to check the exact return code of * the child, so on error always return 2. * Closes #947 * Closes #1503
This commit is contained in:
parent
005c9ad12a
commit
9b70009ab8
|
@ -41,7 +41,6 @@ type LockCommand struct {
|
||||||
|
|
||||||
child *os.Process
|
child *os.Process
|
||||||
childLock sync.Mutex
|
childLock sync.Mutex
|
||||||
|
|
||||||
verbose bool
|
verbose bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,14 +74,18 @@ func (c *LockCommand) Run(args []string) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
var childDone chan struct{}
|
|
||||||
var limit int
|
var limit int
|
||||||
var monitorRetry int
|
var monitorRetry int
|
||||||
var name string
|
var name string
|
||||||
var passStdin bool
|
var passStdin bool
|
||||||
|
var propagateChildCode bool
|
||||||
var timeout time.Duration
|
var timeout time.Duration
|
||||||
|
|
||||||
f := c.BaseCommand.NewFlagSet(c)
|
f := c.BaseCommand.NewFlagSet(c)
|
||||||
|
f.BoolVar(&propagateChildCode, "child-exit-code", false,
|
||||||
|
"Exit 2 if the child process exited with an error if this is true, "+
|
||||||
|
"otherwise this doesn't propagate an error from the child. The "+
|
||||||
|
"default value is false.")
|
||||||
f.IntVar(&limit, "n", 1,
|
f.IntVar(&limit, "n", 1,
|
||||||
"Optional limit on the number of concurrent lock holders. The underlying "+
|
"Optional limit on the number of concurrent lock holders. The underlying "+
|
||||||
"implementation switches from a lock to a semaphore when the value is "+
|
"implementation switches from a lock to a semaphore when the value is "+
|
||||||
|
@ -100,7 +103,7 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
"Pass stdin to the child process.")
|
"Pass stdin to the child process.")
|
||||||
f.DurationVar(&timeout, "timeout", 0,
|
f.DurationVar(&timeout, "timeout", 0,
|
||||||
"Maximum amount of time to wait to acquire the lock, specified as a "+
|
"Maximum amount of time to wait to acquire the lock, specified as a "+
|
||||||
"timestamp like \"1s\" or \"3h\". The default value is 0.")
|
"duration like \"1s\" or \"3h\". The default value is 0.")
|
||||||
f.BoolVar(&c.verbose, "verbose", false,
|
f.BoolVar(&c.verbose, "verbose", false,
|
||||||
"Enable verbose (debugging) output.")
|
"Enable verbose (debugging) output.")
|
||||||
|
|
||||||
|
@ -185,6 +188,8 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we were shutdown but managed to still acquire the lock
|
// Check if we were shutdown but managed to still acquire the lock
|
||||||
|
var childCode int
|
||||||
|
var childErr chan error
|
||||||
select {
|
select {
|
||||||
case <-c.ShutdownCh:
|
case <-c.ShutdownCh:
|
||||||
c.UI.Error("Shutdown triggered during lock acquisition")
|
c.UI.Error("Shutdown triggered during lock acquisition")
|
||||||
|
@ -193,11 +198,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the child process
|
// Start the child process
|
||||||
childDone = make(chan struct{})
|
childErr = make(chan error, 1)
|
||||||
go func() {
|
go func() {
|
||||||
if err := c.startChild(script, childDone, passStdin); err != nil {
|
childErr <- c.startChild(script, passStdin)
|
||||||
c.UI.Error(fmt.Sprintf("%s", err))
|
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Monitor for shutdown, child termination, or lock loss
|
// Monitor for shutdown, child termination, or lock loss
|
||||||
|
@ -210,7 +213,10 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
if c.verbose {
|
if c.verbose {
|
||||||
c.UI.Info("Lock lost, killing child")
|
c.UI.Info("Lock lost, killing child")
|
||||||
}
|
}
|
||||||
case <-childDone:
|
case err := <-childErr:
|
||||||
|
if err != nil {
|
||||||
|
childCode = 2
|
||||||
|
}
|
||||||
if c.verbose {
|
if c.verbose {
|
||||||
c.UI.Info("Child terminated, releasing lock")
|
c.UI.Info("Child terminated, releasing lock")
|
||||||
}
|
}
|
||||||
|
@ -220,8 +226,9 @@ func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||||
// Prevent starting a new child. The lock is never released
|
// Prevent starting a new child. The lock is never released
|
||||||
// after this point.
|
// after this point.
|
||||||
c.childLock.Lock()
|
c.childLock.Lock()
|
||||||
|
|
||||||
// Kill any existing child
|
// Kill any existing child
|
||||||
if err := c.killChild(childDone); err != nil {
|
if err := c.killChild(childErr); err != nil {
|
||||||
c.UI.Error(fmt.Sprintf("%s", err))
|
c.UI.Error(fmt.Sprintf("%s", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,6 +250,13 @@ RELEASE:
|
||||||
} else if c.verbose {
|
} else if c.verbose {
|
||||||
c.UI.Info("Cleanup succeeded")
|
c.UI.Info("Cleanup succeeded")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we detected an error from the child process then we propagate
|
||||||
|
// that.
|
||||||
|
if propagateChildCode {
|
||||||
|
return childCode
|
||||||
|
}
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,8 +335,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name
|
||||||
|
|
||||||
// startChild is a long running routine used to start and
|
// startChild is a long running routine used to start and
|
||||||
// wait for the child process to exit.
|
// wait for the child process to exit.
|
||||||
func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin bool) error {
|
func (c *LockCommand) startChild(script string, passStdin bool) error {
|
||||||
defer close(doneCh)
|
|
||||||
if c.verbose {
|
if c.verbose {
|
||||||
c.UI.Info(fmt.Sprintf("Starting handler '%s'", script))
|
c.UI.Info(fmt.Sprintf("Starting handler '%s'", script))
|
||||||
}
|
}
|
||||||
|
@ -373,7 +386,7 @@ func (c *LockCommand) startChild(script string, doneCh chan struct{}, passStdin
|
||||||
// termination.
|
// termination.
|
||||||
// On Windows, the child is always hard terminated with a SIGKILL, even
|
// On Windows, the child is always hard terminated with a SIGKILL, even
|
||||||
// on the first attempt.
|
// on the first attempt.
|
||||||
func (c *LockCommand) killChild(childDone chan struct{}) error {
|
func (c *LockCommand) killChild(childErr chan error) error {
|
||||||
// Get the child process
|
// Get the child process
|
||||||
child := c.child
|
child := c.child
|
||||||
|
|
||||||
|
@ -395,7 +408,7 @@ func (c *LockCommand) killChild(childDone chan struct{}) error {
|
||||||
|
|
||||||
// Wait for termination, or until a timeout
|
// Wait for termination, or until a timeout
|
||||||
select {
|
select {
|
||||||
case <-childDone:
|
case <-childErr:
|
||||||
if c.verbose {
|
if c.verbose {
|
||||||
c.UI.Info("Child terminated")
|
c.UI.Info("Child terminated")
|
||||||
}
|
}
|
||||||
|
|
|
@ -257,3 +257,33 @@ func TestLockCommand_MonitorRetry_Semaphore_Arg(t *testing.T) {
|
||||||
t.Fatalf("bad: %#v", opts)
|
t.Fatalf("bad: %#v", opts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLockCommand_ChildExitCode(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
a := agent.NewTestAgent(t.Name(), nil)
|
||||||
|
defer a.Shutdown()
|
||||||
|
|
||||||
|
t.Run("clean exit", func(t *testing.T) {
|
||||||
|
_, c := testLockCommand(t)
|
||||||
|
args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 0"}
|
||||||
|
if got, want := c.Run(args), 0; got != want {
|
||||||
|
t.Fatalf("got %d want %d", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("error exit", func(t *testing.T) {
|
||||||
|
_, c := testLockCommand(t)
|
||||||
|
args := []string{"-http-addr=" + a.HTTPAddr(), "-child-exit-code", "test/prefix", "exit 1"}
|
||||||
|
if got, want := c.Run(args), 2; got != want {
|
||||||
|
t.Fatalf("got %d want %d", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("not propagated", func(t *testing.T) {
|
||||||
|
_, c := testLockCommand(t)
|
||||||
|
args := []string{"-http-addr=" + a.HTTPAddr(), "test/prefix", "exit 1"}
|
||||||
|
if got, want := c.Run(args), 0; got != want {
|
||||||
|
t.Fatalf("got %d want %d", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -52,6 +52,10 @@ Windows has no POSIX compatible notion for `SIGTERM`.
|
||||||
|
|
||||||
#### Command Options
|
#### Command Options
|
||||||
|
|
||||||
|
* `-child-exit-code` - Exit 2 if the child process exited with an error
|
||||||
|
if this is true, otherwise this doesn't propagate an error from the
|
||||||
|
child. The default value is false.
|
||||||
|
|
||||||
* `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error
|
* `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error
|
||||||
while monitoring the lock. This allows riding out brief periods of unavailability
|
while monitoring the lock. This allows riding out brief periods of unavailability
|
||||||
without causing leader elections, but increases the amount of time required
|
without causing leader elections, but increases the amount of time required
|
||||||
|
@ -67,6 +71,9 @@ Windows has no POSIX compatible notion for `SIGTERM`.
|
||||||
|
|
||||||
* `-pass-stdin` - Pass stdin to child process.
|
* `-pass-stdin` - Pass stdin to child process.
|
||||||
|
|
||||||
|
* `timeout` - Maximum amount of time to wait to acquire the lock, specified
|
||||||
|
as a duration like `1s` or `3h`. The default value is 0.
|
||||||
|
|
||||||
* `-try` - Attempt to acquire the lock up to the given timeout. The timeout is a
|
* `-try` - Attempt to acquire the lock up to the given timeout. The timeout is a
|
||||||
positive decimal number, with unit suffix, such as "500ms". Valid time units
|
positive decimal number, with unit suffix, such as "500ms". Valid time units
|
||||||
are "ns", "us" (or "µs"), "ms", "s", "m", "h".
|
are "ns", "us" (or "µs"), "ms", "s", "m", "h".
|
||||||
|
|
Loading…
Reference in New Issue