package consul import ( "context" "fmt" "os" "os/exec" "sync/atomic" "testing" "time" "github.com/hashicorp/consul/api" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/testtask" "github.com/hashicorp/nomad/nomad/structs" ) func TestMain(m *testing.M) { if !testtask.Run() { os.Exit(m.Run()) } } // blockingScriptExec implements ScriptExec by running a subcommand that never // exits. type blockingScriptExec struct { // pctx is canceled *only* for test cleanup. Just like real // ScriptExecutors its Exec method cannot be canceled directly -- only // with a timeout. pctx context.Context // running is ticked before blocking to allow synchronizing operations running chan struct{} // set to 1 with atomics if Exec is called and has exited exited int32 } // newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the // caller recvs on the b.running chan. It also returns a CancelFunc for test // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout // expires. func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) { ctx, cancel := context.WithCancel(context.Background()) exec := &blockingScriptExec{ pctx: ctx, running: make(chan struct{}), } return exec, cancel } func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) { b.running <- struct{}{} ctx, cancel := context.WithTimeout(b.pctx, dur) defer cancel() cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h") testtask.SetCmdEnv(cmd) err := cmd.Run() code := 0 if exitErr, ok := err.(*exec.ExitError); ok { if !exitErr.Success() { code = 1 } } atomic.StoreInt32(&b.exited, 1) return []byte{}, code, err } // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits // any running scripts. func TestConsulScript_Exec_Cancel(t *testing.T) { serviceCheck := structs.ServiceCheck{ Name: "sleeper", Interval: time.Hour, Timeout: time.Hour, } exec, cancel := newBlockingScriptExec() defer cancel() // pass nil for heartbeater as it shouldn't be called check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil) handle := check.run() // wait until Exec is called <-exec.running // cancel now that we're blocked in exec handle.cancel() select { case <-handle.wait(): case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exit") } // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be // canceled. Only a wrapper around it obeys the context cancelation. if atomic.LoadInt32(&exec.exited) == 1 { t.Errorf("expected script executor to still be running after timeout") } } type execStatus struct { checkID string output string status string } // fakeHeartbeater implements the heartbeater interface to allow mocking out // Consul in script executor tests. type fakeHeartbeater struct { updates chan execStatus } func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error { f.updates <- execStatus{checkID: checkID, output: output, status: status} return nil } func newFakeHeartbeater() *fakeHeartbeater { return &fakeHeartbeater{updates: make(chan execStatus)} } // TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the // timeout is reached. func TestConsulScript_Exec_TimeoutBasic(t *testing.T) { t.Parallel() serviceCheck := structs.ServiceCheck{ Name: "sleeper", Interval: time.Hour, Timeout: time.Second, } exec, cancel := newBlockingScriptExec() defer cancel() hb := newFakeHeartbeater() check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil) handle := check.run() defer handle.cancel() // just-in-case cleanup <-exec.running // Check for UpdateTTL call select { case update := <-hb.updates: if update.status != api.HealthCritical { t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) } case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exit") } // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be // canceled. Only a wrapper around it obeys the context cancelation. if atomic.LoadInt32(&exec.exited) == 1 { t.Errorf("expected script executor to still be running after timeout") } // Cancel and watch for exit handle.cancel() select { case <-handle.wait(): // ok! case update := <-hb.updates: t.Errorf("unexpected UpdateTTL call on exit with status=%q", update) case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exit") } } // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions type sleeperExec struct{} func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) { time.Sleep(100 * time.Millisecond) return []byte{}, 0, nil } // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when // the timeout is reached and always set a critical status regardless of what // Exec returns. func TestConsulScript_Exec_TimeoutCritical(t *testing.T) { t.Parallel() serviceCheck := structs.ServiceCheck{ Name: "sleeper", Interval: time.Hour, Timeout: time.Nanosecond, } hb := newFakeHeartbeater() check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.HCLogger(t), nil) handle := check.run() defer handle.cancel() // just-in-case cleanup // Check for UpdateTTL call select { case update := <-hb.updates: if update.status != api.HealthCritical { t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) } if update.output != context.DeadlineExceeded.Error() { t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output) } case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to timeout") } } // simpleExec is a fake ScriptExecutor that returns whatever is specified. type simpleExec struct { code int err error } func (s simpleExec) Exec(time.Duration, string, []string) ([]byte, int, error) { return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err } // newSimpleExec creates a new ScriptExecutor that returns the given code and err. func newSimpleExec(code int, err error) simpleExec { return simpleExec{code: code, err: err} } // TestConsulScript_Exec_Shutdown asserts a script will be executed once more // when told to shutdown. func TestConsulScript_Exec_Shutdown(t *testing.T) { serviceCheck := structs.ServiceCheck{ Name: "sleeper", Interval: time.Hour, Timeout: 3 * time.Second, } hb := newFakeHeartbeater() shutdown := make(chan struct{}) exec := newSimpleExec(0, nil) check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown) handle := check.run() defer handle.cancel() // just-in-case cleanup // Tell scriptCheck to exit close(shutdown) select { case update := <-hb.updates: if update.status != api.HealthPassing { t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) } case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exit") } select { case <-handle.wait(): // ok! case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exit") } } func TestConsulScript_Exec_Codes(t *testing.T) { run := func(code int, err error, expected string) func(t *testing.T) { return func(t *testing.T) { t.Parallel() serviceCheck := structs.ServiceCheck{ Name: "test", Interval: time.Hour, Timeout: 3 * time.Second, } hb := newFakeHeartbeater() shutdown := make(chan struct{}) exec := newSimpleExec(code, err) check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown) handle := check.run() defer handle.cancel() select { case update := <-hb.updates: if update.status != expected { t.Errorf("expected %q but received %q", expected, update) } // assert output is being reported expectedOutput := fmt.Sprintf("code=%d err=%v", code, err) if err != nil { expectedOutput = err.Error() } if update.output != expectedOutput { t.Errorf("expected output=%q but found: %q", expectedOutput, update.output) } case <-time.After(3 * time.Second): t.Fatalf("timed out waiting for script check to exec") } } } // Test exit codes with errors t.Run("Passing", run(0, nil, api.HealthPassing)) t.Run("Warning", run(1, nil, api.HealthWarning)) t.Run("Critical-2", run(2, nil, api.HealthCritical)) t.Run("Critical-9000", run(9000, nil, api.HealthCritical)) // Errors should always cause Critical status err := fmt.Errorf("test error") t.Run("Error-0", run(0, err, api.HealthCritical)) t.Run("Error-1", run(1, err, api.HealthCritical)) t.Run("Error-2", run(2, err, api.HealthCritical)) t.Run("Error-9000", run(9000, err, api.HealthCritical)) }