open-nomad/command/agent/consul/script_test.go
Michael Schurter cfcbb9fa21 consul: periodically reconcile services/checks
Periodically sync services and checks from Nomad to Consul. This is
mostly useful when testing with the Consul dev agent which does not
persist state across restarts. However, this is a reasonable safety
measure to prevent skew between Consul's state and Nomad's
services+checks.

Also modernized the test suite a bit.
2018-04-19 15:45:42 -07:00

282 lines
8 KiB
Go

package consul
import (
"context"
"fmt"
"os"
"os/exec"
"testing"
"time"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/testtask"
"github.com/hashicorp/nomad/nomad/structs"
)
func TestMain(m *testing.M) {
if !testtask.Run() {
os.Exit(m.Run())
}
}
// blockingScriptExec implements ScriptExec by running a subcommand that never
// exits.
type blockingScriptExec struct {
// running is ticked before blocking to allow synchronizing operations
running chan struct{}
// set to true if Exec is called and has exited
exited bool
}
func newBlockingScriptExec() *blockingScriptExec {
return &blockingScriptExec{running: make(chan struct{})}
}
func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) {
b.running <- struct{}{}
cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
testtask.SetCmdEnv(cmd)
err := cmd.Run()
code := 0
if exitErr, ok := err.(*exec.ExitError); ok {
if !exitErr.Success() {
code = 1
}
}
b.exited = true
return []byte{}, code, err
}
// TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
// any running scripts.
func TestConsulScript_Exec_Cancel(t *testing.T) {
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: time.Hour,
}
exec := newBlockingScriptExec()
// pass nil for heartbeater as it shouldn't be called
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.Logger(t), nil)
handle := check.run()
// wait until Exec is called
<-exec.running
// cancel now that we're blocked in exec
handle.cancel()
select {
case <-handle.wait():
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
if !exec.exited {
t.Errorf("expected script executor to run and exit but it has not")
}
}
type execStatus struct {
checkID string
output string
status string
}
// fakeHeartbeater implements the heartbeater interface to allow mocking out
// Consul in script executor tests.
type fakeHeartbeater struct {
updates chan execStatus
}
func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error {
f.updates <- execStatus{checkID: checkID, output: output, status: status}
return nil
}
func newFakeHeartbeater() *fakeHeartbeater {
return &fakeHeartbeater{updates: make(chan execStatus)}
}
// TestConsulScript_Exec_Timeout asserts a script will be killed when the
// timeout is reached.
func TestConsulScript_Exec_Timeout(t *testing.T) {
t.Parallel() // run the slow tests in parallel
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: time.Second,
}
exec := newBlockingScriptExec()
hb := newFakeHeartbeater()
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), nil)
handle := check.run()
defer handle.cancel() // just-in-case cleanup
<-exec.running
// Check for UpdateTTL call
select {
case update := <-hb.updates:
if update.status != api.HealthCritical {
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
}
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
if !exec.exited {
t.Errorf("expected script executor to run and exit but it has not")
}
// Cancel and watch for exit
handle.cancel()
select {
case <-handle.wait():
// ok!
case update := <-hb.updates:
t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
}
// sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
type sleeperExec struct{}
func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) {
time.Sleep(100 * time.Millisecond)
return []byte{}, 0, nil
}
// TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when
// the timeout is reached and always set a critical status regardless of what
// Exec returns.
func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
t.Parallel() // run the slow tests in parallel
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: time.Nanosecond,
}
hb := newFakeHeartbeater()
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.Logger(t), nil)
handle := check.run()
defer handle.cancel() // just-in-case cleanup
// Check for UpdateTTL call
select {
case update := <-hb.updates:
if update.status != api.HealthCritical {
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
}
if update.output != context.DeadlineExceeded.Error() {
t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output)
}
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to timeout")
}
}
// simpleExec is a fake ScriptExecutor that returns whatever is specified.
type simpleExec struct {
code int
err error
}
func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) {
return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
}
// newSimpleExec creates a new ScriptExecutor that returns the given code and err.
func newSimpleExec(code int, err error) simpleExec {
return simpleExec{code: code, err: err}
}
// TestConsulScript_Exec_Shutdown asserts a script will be executed once more
// when told to shutdown.
func TestConsulScript_Exec_Shutdown(t *testing.T) {
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: 3 * time.Second,
}
hb := newFakeHeartbeater()
shutdown := make(chan struct{})
exec := newSimpleExec(0, nil)
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
handle := check.run()
defer handle.cancel() // just-in-case cleanup
// Tell scriptCheck to exit
close(shutdown)
select {
case update := <-hb.updates:
if update.status != api.HealthPassing {
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
}
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
select {
case <-handle.wait():
// ok!
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
}
func TestConsulScript_Exec_Codes(t *testing.T) {
run := func(code int, err error, expected string) func(t *testing.T) {
return func(t *testing.T) {
t.Parallel()
serviceCheck := structs.ServiceCheck{
Name: "test",
Interval: time.Hour,
Timeout: 3 * time.Second,
}
hb := newFakeHeartbeater()
shutdown := make(chan struct{})
exec := newSimpleExec(code, err)
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
handle := check.run()
defer handle.cancel()
select {
case update := <-hb.updates:
if update.status != expected {
t.Errorf("expected %q but received %q", expected, update)
}
// assert output is being reported
expectedOutput := fmt.Sprintf("code=%d err=%v", code, err)
if err != nil {
expectedOutput = err.Error()
}
if update.output != expectedOutput {
t.Errorf("expected output=%q but found: %q", expectedOutput, update.output)
}
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exec")
}
}
}
// Test exit codes with errors
t.Run("Passing", run(0, nil, api.HealthPassing))
t.Run("Warning", run(1, nil, api.HealthWarning))
t.Run("Critical-2", run(2, nil, api.HealthCritical))
t.Run("Critical-9000", run(9000, nil, api.HealthCritical))
// Errors should always cause Critical status
err := fmt.Errorf("test error")
t.Run("Error-0", run(0, err, api.HealthCritical))
t.Run("Error-1", run(1, err, api.HealthCritical))
t.Run("Error-2", run(2, err, api.HealthCritical))
t.Run("Error-9000", run(9000, err, api.HealthCritical))
}