Merge pull request #4953 from hashicorp/b-script-context-wrapper

consul: add ScriptExecutor context wrapper
2018-12-10 17:22:53 -08:00 · 2018-12-10 17:22:53 -08:00 · 8808ab9cea
parent 4c5f3ae82c 8fa5e90095
commit 8808ab9cea
2 changed files with 95 additions and 24 deletions
--- a/command/agent/consul/script.go
+++ b/command/agent/consul/script.go
@ -18,6 +18,54 @@ type heartbeater interface {
 	UpdateTTL(id, output, status string) error
 }
 // contextExec allows canceling a ScriptExecutor with a context.
 type contextExec struct {
 	// pctx is the parent context. A subcontext will be created with Exec's
 	// timeout.
 	pctx context.Context
 	// exec to be wrapped in a context
 	exec interfaces.ScriptExecutor
 }
 func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {
 	return &contextExec{
 		pctx: ctx,
 		exec: exec,
 	}
 }
 type execResult struct {
 	buf  []byte
 	code int
 	err  error
 }
 // Exec a command until the timeout expires, the context is canceled, or the
 // underlying Exec returns.
 func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {
 	resCh := make(chan execResult, 1)
 	// Don't trust the underlying implementation to obey timeout
 	ctx, cancel := context.WithTimeout(c.pctx, timeout)
 	defer cancel()
 	go func() {
 		output, code, err := c.exec.Exec(timeout, cmd, args)
 		select {
 		case resCh <- execResult{output, code, err}:
 		case <-ctx.Done():
 		}
 	}()
 	select {
 	case res := <-resCh:
 		return res.buf, res.code, res.err
 	case <-ctx.Done():
 		return nil, 0, ctx.Err()
 	}
 }
 // scriptHandle is returned by scriptCheck.run by cancelling a scriptCheck and
 // waiting for it to shutdown.
 type scriptHandle struct {
@ -74,6 +122,11 @@ func newScriptCheck(allocID, taskName, checkID string, check *structs.ServiceChe
 func (s *scriptCheck) run() *scriptHandle {
 	ctx, cancel := context.WithCancel(context.Background())
 	exitCh := make(chan struct{})
 	// Wrap the original ScriptExecutor in one that obeys context
 	// cancelation.
 	ctxExec := newContextExec(ctx, s.exec)
 	go func() {
 		defer close(exitCh)
 		timer := time.NewTimer(0)
@ -93,7 +146,7 @@ func (s *scriptCheck) run() *scriptHandle {
 			metrics.IncrCounter([]string{"client", "consul", "script_runs"}, 1)
 			// Execute check script with timeout
-			output, code, err := s.exec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
+			output, code, err := ctxExec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
 			switch err {
 			case context.Canceled:
 				// check removed during execution; exit
--- a/command/agent/consul/script_test.go
+++ b/command/agent/consul/script_test.go
@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
 	"sync/atomic"
 	"testing"
 	"time"
@ -23,20 +24,34 @@ func TestMain(m *testing.M) {
 // blockingScriptExec implements ScriptExec by running a subcommand that never
 // exits.
 type blockingScriptExec struct {
 	// pctx is canceled *only* for test cleanup. Just like real
 	// ScriptExecutors its Exec method cannot be canceled directly -- only
 	// with a timeout.
 	pctx context.Context
 	// running is ticked before blocking to allow synchronizing operations
 	running chan struct{}
-	// set to true if Exec is called and has exited
+	// set to 1 with atomics if Exec is called and has exited
-	exited bool
+	exited int32
 }
-func newBlockingScriptExec() *blockingScriptExec {
+// newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
-	return &blockingScriptExec{running: make(chan struct{})}
+// caller recvs on the b.running chan. It also returns a CancelFunc for test
 // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
 // expires.
 func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
 	ctx, cancel := context.WithCancel(context.Background())
 	exec := &blockingScriptExec{
 		pctx:    ctx,
 		running: make(chan struct{}),
 	}
 	return exec, cancel
 }
 func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
 	b.running <- struct{}{}
-	ctx, cancel := context.WithTimeout(context.Background(), dur)
+	ctx, cancel := context.WithTimeout(b.pctx, dur)
 	defer cancel()
 	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
 	testtask.SetCmdEnv(cmd)
@ -47,23 +62,20 @@ func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]by
 			code = 1
 		}
 	}
-	b.exited = true
+	atomic.StoreInt32(&b.exited, 1)
 	return []byte{}, code, err
 }
 // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
 // any running scripts.
 func TestConsulScript_Exec_Cancel(t *testing.T) {
 	// FIXME: This test is failing now as check process cancellation
 	// doesn't get propogated to the script check causing timeouts
 	t.Skip("FIXME: unexpected failing test")
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,
 		Timeout:  time.Hour,
 	}
-	exec := newBlockingScriptExec()
+	exec, cancel := newBlockingScriptExec()
 	defer cancel()
 	// pass nil for heartbeater as it shouldn't be called
 	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
@ -80,8 +92,11 @@ func TestConsulScript_Exec_Cancel(t *testing.T) {
 	case <-time.After(3 * time.Second):
 		t.Fatalf("timed out waiting for script check to exit")
 	}
-	if !exec.exited {
+
-		t.Errorf("expected script executor to run and exit but it has not")
+	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
 	// canceled. Only a wrapper around it obeys the context cancelation.
 	if atomic.LoadInt32(&exec.exited) == 1 {
 		t.Errorf("expected script executor to still be running after timeout")
 	}
 }
@ -106,16 +121,19 @@ func newFakeHeartbeater() *fakeHeartbeater {
 	return &fakeHeartbeater{updates: make(chan execStatus)}
 }
-// TestConsulScript_Exec_Timeout asserts a script will be killed when the
+// TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the
 // timeout is reached.
-func TestConsulScript_Exec_Timeout(t *testing.T) {
+func TestConsulScript_Exec_TimeoutBasic(t *testing.T) {
-	t.Parallel() // run the slow tests in parallel
+	t.Parallel()
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,
 		Timeout:  time.Second,
 	}
-	exec := newBlockingScriptExec()
+
 	exec, cancel := newBlockingScriptExec()
 	defer cancel()
 	hb := newFakeHeartbeater()
 	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
@ -132,8 +150,11 @@ func TestConsulScript_Exec_Timeout(t *testing.T) {
 	case <-time.After(3 * time.Second):
 		t.Fatalf("timed out waiting for script check to exit")
 	}
-	if !exec.exited {
+
-		t.Errorf("expected script executor to run and exit but it has not")
+	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
 	// canceled. Only a wrapper around it obeys the context cancelation.
 	if atomic.LoadInt32(&exec.exited) == 1 {
 		t.Errorf("expected script executor to still be running after timeout")
 	}
 	// Cancel and watch for exit
@ -160,11 +181,8 @@ func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
 // the timeout is reached and always set a critical status regardless of what
 // Exec returns.
 func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
-	// FIXME: This test is failing now because we no longer mark critical
+	t.Parallel()
 	// if check succeeded
 	t.Skip("FIXME: unexpected failing test")
 	t.Parallel() // run the slow tests in parallel
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,