Merge pull request #4953 from hashicorp/b-script-context-wrapper
consul: add ScriptExecutor context wrapper
This commit is contained in:
commit
8808ab9cea
|
@ -18,6 +18,54 @@ type heartbeater interface {
|
||||||
UpdateTTL(id, output, status string) error
|
UpdateTTL(id, output, status string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// contextExec allows canceling a ScriptExecutor with a context.
|
||||||
|
type contextExec struct {
|
||||||
|
// pctx is the parent context. A subcontext will be created with Exec's
|
||||||
|
// timeout.
|
||||||
|
pctx context.Context
|
||||||
|
|
||||||
|
// exec to be wrapped in a context
|
||||||
|
exec interfaces.ScriptExecutor
|
||||||
|
}
|
||||||
|
|
||||||
|
func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {
|
||||||
|
return &contextExec{
|
||||||
|
pctx: ctx,
|
||||||
|
exec: exec,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type execResult struct {
|
||||||
|
buf []byte
|
||||||
|
code int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exec a command until the timeout expires, the context is canceled, or the
|
||||||
|
// underlying Exec returns.
|
||||||
|
func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {
|
||||||
|
resCh := make(chan execResult, 1)
|
||||||
|
|
||||||
|
// Don't trust the underlying implementation to obey timeout
|
||||||
|
ctx, cancel := context.WithTimeout(c.pctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
output, code, err := c.exec.Exec(timeout, cmd, args)
|
||||||
|
select {
|
||||||
|
case resCh <- execResult{output, code, err}:
|
||||||
|
case <-ctx.Done():
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case res := <-resCh:
|
||||||
|
return res.buf, res.code, res.err
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, 0, ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// scriptHandle is returned by scriptCheck.run by cancelling a scriptCheck and
|
// scriptHandle is returned by scriptCheck.run by cancelling a scriptCheck and
|
||||||
// waiting for it to shutdown.
|
// waiting for it to shutdown.
|
||||||
type scriptHandle struct {
|
type scriptHandle struct {
|
||||||
|
@ -74,6 +122,11 @@ func newScriptCheck(allocID, taskName, checkID string, check *structs.ServiceChe
|
||||||
func (s *scriptCheck) run() *scriptHandle {
|
func (s *scriptCheck) run() *scriptHandle {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
exitCh := make(chan struct{})
|
exitCh := make(chan struct{})
|
||||||
|
|
||||||
|
// Wrap the original ScriptExecutor in one that obeys context
|
||||||
|
// cancelation.
|
||||||
|
ctxExec := newContextExec(ctx, s.exec)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer close(exitCh)
|
defer close(exitCh)
|
||||||
timer := time.NewTimer(0)
|
timer := time.NewTimer(0)
|
||||||
|
@ -93,7 +146,7 @@ func (s *scriptCheck) run() *scriptHandle {
|
||||||
metrics.IncrCounter([]string{"client", "consul", "script_runs"}, 1)
|
metrics.IncrCounter([]string{"client", "consul", "script_runs"}, 1)
|
||||||
|
|
||||||
// Execute check script with timeout
|
// Execute check script with timeout
|
||||||
output, code, err := s.exec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
|
output, code, err := ctxExec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
|
||||||
switch err {
|
switch err {
|
||||||
case context.Canceled:
|
case context.Canceled:
|
||||||
// check removed during execution; exit
|
// check removed during execution; exit
|
||||||
|
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -23,20 +24,34 @@ func TestMain(m *testing.M) {
|
||||||
// blockingScriptExec implements ScriptExec by running a subcommand that never
|
// blockingScriptExec implements ScriptExec by running a subcommand that never
|
||||||
// exits.
|
// exits.
|
||||||
type blockingScriptExec struct {
|
type blockingScriptExec struct {
|
||||||
|
// pctx is canceled *only* for test cleanup. Just like real
|
||||||
|
// ScriptExecutors its Exec method cannot be canceled directly -- only
|
||||||
|
// with a timeout.
|
||||||
|
pctx context.Context
|
||||||
|
|
||||||
// running is ticked before blocking to allow synchronizing operations
|
// running is ticked before blocking to allow synchronizing operations
|
||||||
running chan struct{}
|
running chan struct{}
|
||||||
|
|
||||||
// set to true if Exec is called and has exited
|
// set to 1 with atomics if Exec is called and has exited
|
||||||
exited bool
|
exited int32
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBlockingScriptExec() *blockingScriptExec {
|
// newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
|
||||||
return &blockingScriptExec{running: make(chan struct{})}
|
// caller recvs on the b.running chan. It also returns a CancelFunc for test
|
||||||
|
// cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
|
||||||
|
// expires.
|
||||||
|
func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
exec := &blockingScriptExec{
|
||||||
|
pctx: ctx,
|
||||||
|
running: make(chan struct{}),
|
||||||
|
}
|
||||||
|
return exec, cancel
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
|
func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
|
||||||
b.running <- struct{}{}
|
b.running <- struct{}{}
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), dur)
|
ctx, cancel := context.WithTimeout(b.pctx, dur)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
|
cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
|
||||||
testtask.SetCmdEnv(cmd)
|
testtask.SetCmdEnv(cmd)
|
||||||
|
@ -47,23 +62,20 @@ func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]by
|
||||||
code = 1
|
code = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b.exited = true
|
atomic.StoreInt32(&b.exited, 1)
|
||||||
return []byte{}, code, err
|
return []byte{}, code, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
|
// TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
|
||||||
// any running scripts.
|
// any running scripts.
|
||||||
func TestConsulScript_Exec_Cancel(t *testing.T) {
|
func TestConsulScript_Exec_Cancel(t *testing.T) {
|
||||||
// FIXME: This test is failing now as check process cancellation
|
|
||||||
// doesn't get propogated to the script check causing timeouts
|
|
||||||
t.Skip("FIXME: unexpected failing test")
|
|
||||||
|
|
||||||
serviceCheck := structs.ServiceCheck{
|
serviceCheck := structs.ServiceCheck{
|
||||||
Name: "sleeper",
|
Name: "sleeper",
|
||||||
Interval: time.Hour,
|
Interval: time.Hour,
|
||||||
Timeout: time.Hour,
|
Timeout: time.Hour,
|
||||||
}
|
}
|
||||||
exec := newBlockingScriptExec()
|
exec, cancel := newBlockingScriptExec()
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
// pass nil for heartbeater as it shouldn't be called
|
// pass nil for heartbeater as it shouldn't be called
|
||||||
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
|
||||||
|
@ -80,8 +92,11 @@ func TestConsulScript_Exec_Cancel(t *testing.T) {
|
||||||
case <-time.After(3 * time.Second):
|
case <-time.After(3 * time.Second):
|
||||||
t.Fatalf("timed out waiting for script check to exit")
|
t.Fatalf("timed out waiting for script check to exit")
|
||||||
}
|
}
|
||||||
if !exec.exited {
|
|
||||||
t.Errorf("expected script executor to run and exit but it has not")
|
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
|
||||||
|
// canceled. Only a wrapper around it obeys the context cancelation.
|
||||||
|
if atomic.LoadInt32(&exec.exited) == 1 {
|
||||||
|
t.Errorf("expected script executor to still be running after timeout")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,16 +121,19 @@ func newFakeHeartbeater() *fakeHeartbeater {
|
||||||
return &fakeHeartbeater{updates: make(chan execStatus)}
|
return &fakeHeartbeater{updates: make(chan execStatus)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestConsulScript_Exec_Timeout asserts a script will be killed when the
|
// TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the
|
||||||
// timeout is reached.
|
// timeout is reached.
|
||||||
func TestConsulScript_Exec_Timeout(t *testing.T) {
|
func TestConsulScript_Exec_TimeoutBasic(t *testing.T) {
|
||||||
t.Parallel() // run the slow tests in parallel
|
t.Parallel()
|
||||||
|
|
||||||
serviceCheck := structs.ServiceCheck{
|
serviceCheck := structs.ServiceCheck{
|
||||||
Name: "sleeper",
|
Name: "sleeper",
|
||||||
Interval: time.Hour,
|
Interval: time.Hour,
|
||||||
Timeout: time.Second,
|
Timeout: time.Second,
|
||||||
}
|
}
|
||||||
exec := newBlockingScriptExec()
|
|
||||||
|
exec, cancel := newBlockingScriptExec()
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
hb := newFakeHeartbeater()
|
hb := newFakeHeartbeater()
|
||||||
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
|
||||||
|
@ -132,8 +150,11 @@ func TestConsulScript_Exec_Timeout(t *testing.T) {
|
||||||
case <-time.After(3 * time.Second):
|
case <-time.After(3 * time.Second):
|
||||||
t.Fatalf("timed out waiting for script check to exit")
|
t.Fatalf("timed out waiting for script check to exit")
|
||||||
}
|
}
|
||||||
if !exec.exited {
|
|
||||||
t.Errorf("expected script executor to run and exit but it has not")
|
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
|
||||||
|
// canceled. Only a wrapper around it obeys the context cancelation.
|
||||||
|
if atomic.LoadInt32(&exec.exited) == 1 {
|
||||||
|
t.Errorf("expected script executor to still be running after timeout")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cancel and watch for exit
|
// Cancel and watch for exit
|
||||||
|
@ -160,11 +181,8 @@ func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
|
||||||
// the timeout is reached and always set a critical status regardless of what
|
// the timeout is reached and always set a critical status regardless of what
|
||||||
// Exec returns.
|
// Exec returns.
|
||||||
func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
|
func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
|
||||||
// FIXME: This test is failing now because we no longer mark critical
|
t.Parallel()
|
||||||
// if check succeeded
|
|
||||||
t.Skip("FIXME: unexpected failing test")
|
|
||||||
|
|
||||||
t.Parallel() // run the slow tests in parallel
|
|
||||||
serviceCheck := structs.ServiceCheck{
|
serviceCheck := structs.ServiceCheck{
|
||||||
Name: "sleeper",
|
Name: "sleeper",
|
||||||
Interval: time.Hour,
|
Interval: time.Hour,
|
||||||
|
|
Loading…
Reference in a new issue