open-nomad/client/allocrunner/taskrunner/script_check_hook_test.go
Seth Hoenig 804f9fdb93
services: ensure task group is set on service hook (#16240)
This PR fixes a bug where the task group information was not being set
on the serviceHook.AllocInfo struct, which is needed later on for calculating
the CheckID of a nomad service check. The CheckID is calculated independently
from multiple callsites, and the information being passed in must be consistent,
including the group name.

The workload.AllocInfo.Group was not set at this callsite, due to the bug fixed in this PR.
 https://github.com/hashicorp/nomad/blob/main/client/serviceregistration/nsd/nsd.go#L114
2023-02-22 10:22:48 -06:00

329 lines
10 KiB
Go

package taskrunner
import (
"context"
"fmt"
"sync/atomic"
"testing"
"time"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
"github.com/hashicorp/nomad/client/serviceregistration"
regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
"github.com/hashicorp/nomad/client/taskenv"
agentconsul "github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
func newScriptMock(hb TTLUpdater, exec interfaces.ScriptExecutor, logger hclog.Logger, interval, timeout time.Duration) *scriptCheck {
script := newScriptCheck(&scriptCheckConfig{
allocID: "allocid",
taskName: "testtask",
serviceID: "serviceid",
check: &structs.ServiceCheck{
Interval: interval,
Timeout: timeout,
},
ttlUpdater: hb,
driverExec: exec,
taskEnv: &taskenv.TaskEnv{},
logger: logger,
shutdownCh: nil,
})
script.callback = newScriptCheckCallback(script)
script.lastCheckOk = true
return script
}
// fakeHeartbeater implements the TTLUpdater interface to allow mocking out
// Consul in script executor tests.
type fakeHeartbeater struct {
heartbeats chan heartbeat
}
func (f *fakeHeartbeater) UpdateTTL(checkID, namespace, output, status string) error {
f.heartbeats <- heartbeat{checkID: checkID, output: output, status: status}
return nil
}
func newFakeHeartbeater() *fakeHeartbeater {
return &fakeHeartbeater{heartbeats: make(chan heartbeat)}
}
type heartbeat struct {
checkID string
output string
status string
}
// TestScript_Exec_Cancel asserts cancelling a script check shortcircuits
// any running scripts.
func TestScript_Exec_Cancel(t *testing.T) {
ci.Parallel(t)
exec, cancel := newBlockingScriptExec()
defer cancel()
logger := testlog.HCLogger(t)
script := newScriptMock(nil, // TTLUpdater should never be called
exec, logger, time.Hour, time.Hour)
handle := script.run()
<-exec.running // wait until Exec is called
handle.cancel() // cancel now that we're blocked in exec
select {
case <-handle.wait():
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
// canceled. Only a wrapper around it obeys the context cancelation.
require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
"expected script executor to still be running after timeout")
}
// TestScript_Exec_TimeoutBasic asserts a script will be killed when the
// timeout is reached.
func TestScript_Exec_TimeoutBasic(t *testing.T) {
ci.Parallel(t)
exec, cancel := newBlockingScriptExec()
defer cancel()
logger := testlog.HCLogger(t)
hb := newFakeHeartbeater()
script := newScriptMock(hb, exec, logger, time.Hour, time.Second)
handle := script.run()
defer handle.cancel() // cleanup
<-exec.running // wait until Exec is called
// Check for UpdateTTL call
select {
case update := <-hb.heartbeats:
require.Equal(t, update.output, context.DeadlineExceeded.Error())
require.Equal(t, update.status, api.HealthCritical)
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
// canceled. Only a wrapper around it obeys the context cancelation.
require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
"expected script executor to still be running after timeout")
// Cancel and watch for exit
handle.cancel()
select {
case <-handle.wait(): // ok!
case update := <-hb.heartbeats:
t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
}
// TestScript_Exec_TimeoutCritical asserts a script will be killed when
// the timeout is reached and always set a critical status regardless of what
// Exec returns.
func TestScript_Exec_TimeoutCritical(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
hb := newFakeHeartbeater()
script := newScriptMock(hb, sleeperExec{}, logger, time.Hour, time.Nanosecond)
handle := script.run()
defer handle.cancel() // cleanup
// Check for UpdateTTL call
select {
case update := <-hb.heartbeats:
require.Equal(t, update.output, context.DeadlineExceeded.Error())
require.Equal(t, update.status, api.HealthCritical)
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to timeout")
}
}
// TestScript_Exec_Shutdown asserts a script will be executed once more
// when told to shutdown.
func TestScript_Exec_Shutdown(t *testing.T) {
ci.Parallel(t)
shutdown := make(chan struct{})
exec := newSimpleExec(0, nil)
logger := testlog.HCLogger(t)
hb := newFakeHeartbeater()
script := newScriptMock(hb, exec, logger, time.Hour, 3*time.Second)
script.shutdownCh = shutdown
handle := script.run()
defer handle.cancel() // cleanup
close(shutdown) // tell scriptCheck to exit
select {
case update := <-hb.heartbeats:
require.Equal(t, update.output, "code=0 err=<nil>")
require.Equal(t, update.status, api.HealthPassing)
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
select {
case <-handle.wait(): // ok!
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
}
// TestScript_Exec_Codes asserts script exit codes are translated to their
// corresponding Consul health check status.
func TestScript_Exec_Codes(t *testing.T) {
ci.Parallel(t)
exec := newScriptedExec([]execResult{
{[]byte("output"), 1, nil},
{[]byte("output"), 0, nil},
{[]byte("output"), 0, context.DeadlineExceeded},
{[]byte("output"), 0, nil},
{[]byte("<ignored output>"), 2, fmt.Errorf("some error")},
{[]byte("output"), 0, nil},
{[]byte("error9000"), 9000, nil},
})
logger := testlog.HCLogger(t)
hb := newFakeHeartbeater()
script := newScriptMock(
hb, exec, logger, time.Nanosecond, 3*time.Second)
handle := script.run()
defer handle.cancel() // cleanup
deadline := time.After(3 * time.Second)
expected := []heartbeat{
{script.id, "output", api.HealthWarning},
{script.id, "output", api.HealthPassing},
{script.id, context.DeadlineExceeded.Error(), api.HealthCritical},
{script.id, "output", api.HealthPassing},
{script.id, "some error", api.HealthCritical},
{script.id, "output", api.HealthPassing},
{script.id, "error9000", api.HealthCritical},
}
for i := 0; i <= 6; i++ {
select {
case update := <-hb.heartbeats:
require.Equal(t, update, expected[i],
"expected update %d to be '%s' but received '%s'",
i, expected[i], update)
case <-deadline:
t.Fatalf("timed out waiting for all script checks to finish")
}
}
}
// TestScript_TaskEnvInterpolation asserts that script check hooks are
// interpolated in the same way that services are
func TestScript_TaskEnvInterpolation(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
consulClient := regMock.NewServiceRegistrationHandler(logger)
regWrap := wrapper.NewHandlerWrapper(logger, consulClient, nil)
exec, cancel := newBlockingScriptExec()
defer cancel()
alloc := mock.ConnectAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Services[0].Name = "${NOMAD_JOB_NAME}-${TASK}-${SVC_NAME}"
task.Services[0].Checks[0].Name = "${NOMAD_JOB_NAME}-${SVC_NAME}-check"
alloc.Job.Canonicalize() // need to re-canonicalize b/c the mock already did it
env := taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
"script_check",
map[string]string{"SVC_NAME": "frontend"}).Build()
svcHook := newServiceHook(serviceHookConfig{
alloc: alloc,
task: task,
serviceRegWrapper: regWrap,
logger: logger,
})
// emulate prestart having been fired
svcHook.taskEnv = env
scHook := newScriptCheckHook(scriptCheckHookConfig{
alloc: alloc,
task: task,
consul: consulClient,
logger: logger,
shutdownWait: time.Hour, // TTLUpdater will never be called
})
// emulate prestart having been fired
scHook.taskEnv = env
scHook.driverExec = exec
workload := svcHook.getWorkloadServices()
must.Eq(t, "web", workload.AllocInfo.Group)
expectedSvc := workload.Services[0]
expected := agentconsul.MakeCheckID(serviceregistration.MakeAllocServiceID(
alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
actual := scHook.newScriptChecks()
check, ok := actual[expected]
must.True(t, ok)
must.Eq(t, "my-job-frontend-check", check.check.Name)
// emulate an update
env = taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
"script_check",
map[string]string{"SVC_NAME": "backend"}).Build()
scHook.taskEnv = env
svcHook.taskEnv = env
expectedSvc = svcHook.getWorkloadServices().Services[0]
expected = agentconsul.MakeCheckID(serviceregistration.MakeAllocServiceID(
alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
actual = scHook.newScriptChecks()
check, ok = actual[expected]
must.True(t, ok)
must.Eq(t, "my-job-backend-check", check.check.Name)
}
func TestScript_associated(t *testing.T) {
ci.Parallel(t)
t.Run("neither set", func(t *testing.T) {
require.False(t, new(scriptCheckHook).associated("task1", "", ""))
})
t.Run("service set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "task1", ""))
require.False(t, new(scriptCheckHook).associated("task1", "task2", ""))
})
t.Run("check set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "", "task2"))
})
t.Run("both set", func(t *testing.T) {
// ensure check.task takes precedence over service.task
require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2"))
require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2"))
})
}