open-nomad/client/allocrunner/taskrunner/logmon_hook_unix_test.go
Eng Zer Jun 97d1bc735c
test: use T.TempDir to create temporary test directory (#12853)
* test: use `T.TempDir` to create temporary test directory

This commit replaces `ioutil.TempDir` with `t.TempDir` in tests. The
directory created by `t.TempDir` is automatically removed when the test
and all its subtests complete.

Prior to this commit, temporary directory created using `ioutil.TempDir`
needs to be removed manually by calling `os.RemoveAll`, which is omitted
in some tests. The error handling boilerplate e.g.
	defer func() {
		if err := os.RemoveAll(dir); err != nil {
			t.Fatal(err)
		}
	}
is also tedious, but `t.TempDir` handles this for us nicely.

Reference: https://pkg.go.dev/testing#T.TempDir
Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>

* test: fix TestLogmon_Start_restart on Windows

Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>

* test: fix failing TestConsul_Integration

t.TempDir fails to perform the cleanup properly because the folder is
still in use

testing.go:967: TempDir RemoveAll cleanup: unlinkat /tmp/TestConsul_Integration2837567823/002/191a6f1a-5371-cf7c-da38-220fe85d10e5/web/secrets: device or resource busy

Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
2022-05-12 11:42:40 -04:00

174 lines
4.7 KiB
Go

//go:build !windows
// +build !windows
package taskrunner
import (
"context"
"encoding/json"
"fmt"
"os"
"syscall"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/testutil"
"github.com/shirou/gopsutil/v3/process"
"github.com/stretchr/testify/require"
)
// TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the
// Nomad client is restarting and asserts failing to reattach to logmon causes
// nomad to spawn a new logmon.
func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) {
ci.Parallel(t)
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
dir := t.TempDir()
hookConf := newLogMonHookConfig(task.Name, dir)
runner := &TaskRunner{logmonHookConfig: hookConf}
hook := newLogMonHook(runner, testlog.HCLogger(t))
req := interfaces.TaskPrestartRequest{
Task: task,
}
resp := interfaces.TaskPrestartResponse{}
// First start
require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
defer hook.Stop(context.Background(), nil, nil)
origState := resp.State
origHookData := resp.State[logmonReattachKey]
require.NotEmpty(t, origHookData)
// Pluck PID out of reattach synthesize a crash
reattach := struct {
Pid int
}{}
require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
pid := reattach.Pid
require.NotZero(t, pid)
proc, _ := os.FindProcess(pid)
// Assert logmon is running
require.NoError(t, proc.Signal(syscall.Signal(0)))
// Kill it
require.NoError(t, proc.Signal(os.Kill))
// Since signals are asynchronous wait for the process to die
testutil.WaitForResult(func() (bool, error) {
err := proc.Signal(syscall.Signal(0))
return err != nil, fmt.Errorf("pid %d still running", pid)
}, func(err error) {
require.NoError(t, err)
})
// Running prestart again should return a recoverable error with no
// reattach config to cause the task to be restarted with a new logmon.
req.PreviousState = map[string]string{
logmonReattachKey: origHookData,
}
resp = interfaces.TaskPrestartResponse{}
err := hook.Prestart(context.Background(), &req, &resp)
require.NoError(t, err)
require.NotEqual(t, origState, resp.State)
// Running stop should shutdown logmon
require.NoError(t, hook.Stop(context.Background(), nil, nil))
}
// TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the
// Nomad client is calling Start() and asserts that we recover and spawn a new logmon.
func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) {
ci.Parallel(t)
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
dir := t.TempDir()
hookConf := newLogMonHookConfig(task.Name, dir)
runner := &TaskRunner{logmonHookConfig: hookConf}
hook := newLogMonHook(runner, testlog.HCLogger(t))
req := interfaces.TaskPrestartRequest{
Task: task,
}
resp := interfaces.TaskPrestartResponse{}
// First start
require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
defer hook.Stop(context.Background(), nil, nil)
origState := resp.State
origHookData := resp.State[logmonReattachKey]
require.NotEmpty(t, origHookData)
// Pluck PID out of reattach synthesize a crash
reattach := struct {
Pid int
}{}
require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
pid := reattach.Pid
require.NotZero(t, pid)
proc, err := process.NewProcess(int32(pid))
require.NoError(t, err)
// Assert logmon is running
require.NoError(t, proc.SendSignal(syscall.Signal(0)))
// SIGSTOP would freeze process without it being considered
// exited; so this causes process to be non-exited at beginning of call
// then we kill process while Start call is running
require.NoError(t, proc.SendSignal(syscall.SIGSTOP))
testutil.WaitForResult(func() (bool, error) {
status, err := proc.Status()
if err != nil {
return false, err
}
if len(status) == 0 {
return false, fmt.Errorf("process status did not return value")
}
if status[0] != "stop" {
return false, fmt.Errorf("process is not stopped yet: %v", status)
}
return true, nil
}, func(err error) {
require.NoError(t, err)
})
go func() {
time.Sleep(2 * time.Second)
proc.SendSignal(syscall.SIGCONT)
proc.Kill()
}()
req.PreviousState = map[string]string{
logmonReattachKey: origHookData,
}
initLogmon, initClient := hook.logmon, hook.logmonPluginClient
resp = interfaces.TaskPrestartResponse{}
err = hook.Prestart(context.Background(), &req, &resp)
require.NoError(t, err)
require.NotEqual(t, origState, resp.State)
// assert that we got a new client and logmon
require.True(t, initLogmon != hook.logmon)
require.True(t, initClient != hook.logmonPluginClient)
}