cfcbb9fa21
Periodically sync services and checks from Nomad to Consul. This is mostly useful when testing with the Consul dev agent which does not persist state across restarts. However, this is a reasonable safety measure to prevent skew between Consul's state and Nomad's services+checks. Also modernized the test suite a bit.
282 lines
8 KiB
Go
282 lines
8 KiB
Go
package consul
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/api"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/helper/testtask"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
func TestMain(m *testing.M) {
|
|
if !testtask.Run() {
|
|
os.Exit(m.Run())
|
|
}
|
|
}
|
|
|
|
// blockingScriptExec implements ScriptExec by running a subcommand that never
|
|
// exits.
|
|
type blockingScriptExec struct {
|
|
// running is ticked before blocking to allow synchronizing operations
|
|
running chan struct{}
|
|
|
|
// set to true if Exec is called and has exited
|
|
exited bool
|
|
}
|
|
|
|
func newBlockingScriptExec() *blockingScriptExec {
|
|
return &blockingScriptExec{running: make(chan struct{})}
|
|
}
|
|
|
|
func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) {
|
|
b.running <- struct{}{}
|
|
cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
|
|
testtask.SetCmdEnv(cmd)
|
|
err := cmd.Run()
|
|
code := 0
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
if !exitErr.Success() {
|
|
code = 1
|
|
}
|
|
}
|
|
b.exited = true
|
|
return []byte{}, code, err
|
|
}
|
|
|
|
// TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
|
|
// any running scripts.
|
|
func TestConsulScript_Exec_Cancel(t *testing.T) {
|
|
serviceCheck := structs.ServiceCheck{
|
|
Name: "sleeper",
|
|
Interval: time.Hour,
|
|
Timeout: time.Hour,
|
|
}
|
|
exec := newBlockingScriptExec()
|
|
|
|
// pass nil for heartbeater as it shouldn't be called
|
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.Logger(t), nil)
|
|
handle := check.run()
|
|
|
|
// wait until Exec is called
|
|
<-exec.running
|
|
|
|
// cancel now that we're blocked in exec
|
|
handle.cancel()
|
|
|
|
select {
|
|
case <-handle.wait():
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exit")
|
|
}
|
|
if !exec.exited {
|
|
t.Errorf("expected script executor to run and exit but it has not")
|
|
}
|
|
}
|
|
|
|
type execStatus struct {
|
|
checkID string
|
|
output string
|
|
status string
|
|
}
|
|
|
|
// fakeHeartbeater implements the heartbeater interface to allow mocking out
|
|
// Consul in script executor tests.
|
|
type fakeHeartbeater struct {
|
|
updates chan execStatus
|
|
}
|
|
|
|
func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error {
|
|
f.updates <- execStatus{checkID: checkID, output: output, status: status}
|
|
return nil
|
|
}
|
|
|
|
func newFakeHeartbeater() *fakeHeartbeater {
|
|
return &fakeHeartbeater{updates: make(chan execStatus)}
|
|
}
|
|
|
|
// TestConsulScript_Exec_Timeout asserts a script will be killed when the
|
|
// timeout is reached.
|
|
func TestConsulScript_Exec_Timeout(t *testing.T) {
|
|
t.Parallel() // run the slow tests in parallel
|
|
serviceCheck := structs.ServiceCheck{
|
|
Name: "sleeper",
|
|
Interval: time.Hour,
|
|
Timeout: time.Second,
|
|
}
|
|
exec := newBlockingScriptExec()
|
|
|
|
hb := newFakeHeartbeater()
|
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), nil)
|
|
handle := check.run()
|
|
defer handle.cancel() // just-in-case cleanup
|
|
<-exec.running
|
|
|
|
// Check for UpdateTTL call
|
|
select {
|
|
case update := <-hb.updates:
|
|
if update.status != api.HealthCritical {
|
|
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
|
|
}
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exit")
|
|
}
|
|
if !exec.exited {
|
|
t.Errorf("expected script executor to run and exit but it has not")
|
|
}
|
|
|
|
// Cancel and watch for exit
|
|
handle.cancel()
|
|
select {
|
|
case <-handle.wait():
|
|
// ok!
|
|
case update := <-hb.updates:
|
|
t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exit")
|
|
}
|
|
}
|
|
|
|
// sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
|
|
type sleeperExec struct{}
|
|
|
|
func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) {
|
|
time.Sleep(100 * time.Millisecond)
|
|
return []byte{}, 0, nil
|
|
}
|
|
|
|
// TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when
|
|
// the timeout is reached and always set a critical status regardless of what
|
|
// Exec returns.
|
|
func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
|
|
t.Parallel() // run the slow tests in parallel
|
|
serviceCheck := structs.ServiceCheck{
|
|
Name: "sleeper",
|
|
Interval: time.Hour,
|
|
Timeout: time.Nanosecond,
|
|
}
|
|
hb := newFakeHeartbeater()
|
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.Logger(t), nil)
|
|
handle := check.run()
|
|
defer handle.cancel() // just-in-case cleanup
|
|
|
|
// Check for UpdateTTL call
|
|
select {
|
|
case update := <-hb.updates:
|
|
if update.status != api.HealthCritical {
|
|
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
|
|
}
|
|
if update.output != context.DeadlineExceeded.Error() {
|
|
t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output)
|
|
}
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to timeout")
|
|
}
|
|
}
|
|
|
|
// simpleExec is a fake ScriptExecutor that returns whatever is specified.
|
|
type simpleExec struct {
|
|
code int
|
|
err error
|
|
}
|
|
|
|
func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) {
|
|
return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
|
|
}
|
|
|
|
// newSimpleExec creates a new ScriptExecutor that returns the given code and err.
|
|
func newSimpleExec(code int, err error) simpleExec {
|
|
return simpleExec{code: code, err: err}
|
|
}
|
|
|
|
// TestConsulScript_Exec_Shutdown asserts a script will be executed once more
|
|
// when told to shutdown.
|
|
func TestConsulScript_Exec_Shutdown(t *testing.T) {
|
|
serviceCheck := structs.ServiceCheck{
|
|
Name: "sleeper",
|
|
Interval: time.Hour,
|
|
Timeout: 3 * time.Second,
|
|
}
|
|
|
|
hb := newFakeHeartbeater()
|
|
shutdown := make(chan struct{})
|
|
exec := newSimpleExec(0, nil)
|
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
|
|
handle := check.run()
|
|
defer handle.cancel() // just-in-case cleanup
|
|
|
|
// Tell scriptCheck to exit
|
|
close(shutdown)
|
|
|
|
select {
|
|
case update := <-hb.updates:
|
|
if update.status != api.HealthPassing {
|
|
t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
|
|
}
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exit")
|
|
}
|
|
|
|
select {
|
|
case <-handle.wait():
|
|
// ok!
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exit")
|
|
}
|
|
}
|
|
|
|
func TestConsulScript_Exec_Codes(t *testing.T) {
|
|
run := func(code int, err error, expected string) func(t *testing.T) {
|
|
return func(t *testing.T) {
|
|
t.Parallel()
|
|
serviceCheck := structs.ServiceCheck{
|
|
Name: "test",
|
|
Interval: time.Hour,
|
|
Timeout: 3 * time.Second,
|
|
}
|
|
|
|
hb := newFakeHeartbeater()
|
|
shutdown := make(chan struct{})
|
|
exec := newSimpleExec(code, err)
|
|
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
|
|
handle := check.run()
|
|
defer handle.cancel()
|
|
|
|
select {
|
|
case update := <-hb.updates:
|
|
if update.status != expected {
|
|
t.Errorf("expected %q but received %q", expected, update)
|
|
}
|
|
// assert output is being reported
|
|
expectedOutput := fmt.Sprintf("code=%d err=%v", code, err)
|
|
if err != nil {
|
|
expectedOutput = err.Error()
|
|
}
|
|
if update.output != expectedOutput {
|
|
t.Errorf("expected output=%q but found: %q", expectedOutput, update.output)
|
|
}
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatalf("timed out waiting for script check to exec")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Test exit codes with errors
|
|
t.Run("Passing", run(0, nil, api.HealthPassing))
|
|
t.Run("Warning", run(1, nil, api.HealthWarning))
|
|
t.Run("Critical-2", run(2, nil, api.HealthCritical))
|
|
t.Run("Critical-9000", run(9000, nil, api.HealthCritical))
|
|
|
|
// Errors should always cause Critical status
|
|
err := fmt.Errorf("test error")
|
|
t.Run("Error-0", run(0, err, api.HealthCritical))
|
|
t.Run("Error-1", run(1, err, api.HealthCritical))
|
|
t.Run("Error-2", run(2, err, api.HealthCritical))
|
|
t.Run("Error-9000", run(9000, err, api.HealthCritical))
|
|
}
|