open-nomad/drivers/shared/executor/executor_test.go

655 lines
18 KiB
Go
Raw Normal View History

2016-02-05 00:03:17 +00:00
package executor
2016-02-04 23:39:29 +00:00
import (
"bytes"
2018-12-05 16:07:48 +00:00
"context"
"fmt"
"io"
2016-02-04 23:39:29 +00:00
"io/ioutil"
"os"
"path/filepath"
"runtime"
2016-02-04 23:39:29 +00:00
"strings"
"sync"
2016-04-01 20:28:20 +00:00
"syscall"
2016-02-04 23:39:29 +00:00
"testing"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/ci"
2016-02-04 23:39:29 +00:00
"github.com/hashicorp/nomad/client/allocdir"
client: enable support for cgroups v2 This PR introduces support for using Nomad on systems with cgroups v2 [1] enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems for Nomad users. Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer, but not so for managing cpuset cgroups. Before, Nomad has been making use of a feature in v1 where a PID could be a member of more than one cgroup. In v2 this is no longer possible, and so the logic around computing cpuset values must be modified. When Nomad detects v2, it manages cpuset values in-process, rather than making use of cgroup heirarchy inheritence via shared/reserved parents. Nomad will only activate the v2 logic when it detects cgroups2 is mounted at /sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2 mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to use the v1 logic, and should operate as before. Systems that do not support cgroups v2 are also not affected. When v2 is activated, Nomad will create a parent called nomad.slice (unless otherwise configured in Client conifg), and create cgroups for tasks using naming convention <allocID>-<task>.scope. These follow the naming convention set by systemd and also used by Docker when cgroups v2 is detected. Client nodes now export a new fingerprint attribute, unique.cgroups.version which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by Nomad. The new cpuset management strategy fixes #11705, where docker tasks that spawned processes on startup would "leak". In cgroups v2, the PIDs are started in the cgroup they will always live in, and thus the cause of the leak is eliminated. [1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html Closes #11289 Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/hashicorp/nomad/client/taskenv"
client: enable support for cgroups v2 This PR introduces support for using Nomad on systems with cgroups v2 [1] enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems for Nomad users. Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer, but not so for managing cpuset cgroups. Before, Nomad has been making use of a feature in v1 where a PID could be a member of more than one cgroup. In v2 this is no longer possible, and so the logic around computing cpuset values must be modified. When Nomad detects v2, it manages cpuset values in-process, rather than making use of cgroup heirarchy inheritence via shared/reserved parents. Nomad will only activate the v2 logic when it detects cgroups2 is mounted at /sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2 mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to use the v1 logic, and should operate as before. Systems that do not support cgroups v2 are also not affected. When v2 is activated, Nomad will create a parent called nomad.slice (unless otherwise configured in Client conifg), and create cgroups for tasks using naming convention <allocID>-<task>.scope. These follow the naming convention set by systemd and also used by Docker when cgroups v2 is detected. Client nodes now export a new fingerprint attribute, unique.cgroups.version which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by Nomad. The new cpuset management strategy fixes #11705, where docker tasks that spawned processes on startup would "leak". In cgroups v2, the PIDs are started in the cgroup they will always live in, and thus the cause of the leak is eliminated. [1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html Closes #11289 Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
"github.com/hashicorp/nomad/client/testutil"
2018-06-13 22:33:25 +00:00
"github.com/hashicorp/nomad/helper/testlog"
2016-02-04 23:39:29 +00:00
"github.com/hashicorp/nomad/nomad/mock"
2019-03-26 13:06:36 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2018-12-14 00:21:41 +00:00
"github.com/hashicorp/nomad/plugins/drivers"
tu "github.com/hashicorp/nomad/testutil"
ps "github.com/mitchellh/go-ps"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
2016-02-04 23:39:29 +00:00
)
2019-03-26 13:06:36 +00:00
var executorFactories = map[string]executorFactory{}
type executorFactory struct {
new func(hclog.Logger) Executor
configureExecCmd func(*testing.T, *ExecCommand)
}
var universalFactory = executorFactory{
new: NewExecutor,
configureExecCmd: func(*testing.T, *ExecCommand) {},
}
func init() {
executorFactories["UniversalExecutor"] = universalFactory
}
type testExecCmd struct {
command *ExecCommand
allocDir *allocdir.AllocDir
stdout *bytes.Buffer
stderr *bytes.Buffer
outputCopyDone *sync.WaitGroup
}
// testExecutorContext returns an ExecutorContext and AllocDir.
//
// The caller is responsible for calling AllocDir.Destroy() to cleanup.
func testExecutorCommand(t *testing.T) *testExecCmd {
2016-02-04 23:39:29 +00:00
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
taskEnv := taskenv.NewBuilder(mock.Node(), alloc, task, "global").Build()
2016-02-04 23:39:29 +00:00
client: never embed alloc_dir in chroot Fixes #2522 Skip embedding client.alloc_dir when building chroot. If a user configures a Nomad client agent so that the chroot_env will embed the client.alloc_dir, Nomad will happily infinitely recurse while building the chroot until something horrible happens. The best case scenario is the filesystem's path length limit is hit. The worst case scenario is disk space is exhausted. A bad agent configuration will look something like this: ```hcl data_dir = "/tmp/nomad-badagent" client { enabled = true chroot_env { # Note that the source matches the data_dir "/tmp/nomad-badagent" = "/ohno" # ... } } ``` Note that `/ohno/client` (the state_dir) will still be created but not `/ohno/alloc` (the alloc_dir). While I cannot think of a good reason why someone would want to embed Nomad's client (and possibly server) directories in chroots, there should be no cause for harm. chroots are only built when Nomad runs as root, and Nomad disables running exec jobs as root by default. Therefore even if client state is copied into chroots, it will be inaccessible to tasks. Skipping the `data_dir` and `{client,server}.state_dir` is possible, but this PR attempts to implement the minimum viable solution to reduce risk of unintended side effects or bugs. When running tests as root in a vm without the fix, the following error occurs: ``` === RUN TestAllocDir_SkipAllocDir alloc_dir_test.go:520: Error Trace: alloc_dir_test.go:520 Error: Received unexpected error: Couldn't create destination file /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/testtask/nomad/test/testtask/.../nomad/test/testtask/secrets/.nomad-mount: open /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/.../testtask/secrets/.nomad-mount: file name too long Test: TestAllocDir_SkipAllocDir --- FAIL: TestAllocDir_SkipAllocDir (22.76s) ``` Also removed unused Copy methods on AllocDir and TaskDir structs. Thanks to @eveld for not letting me forget about this!
2021-10-15 23:56:14 +00:00
allocDir := allocdir.NewAllocDir(testlog.HCLogger(t), t.TempDir(), alloc.ID)
if err := allocDir.Build(); err != nil {
2018-06-13 22:33:25 +00:00
t.Fatalf("AllocDir.Build() failed: %v", err)
2016-02-04 23:39:29 +00:00
}
2019-01-04 21:11:25 +00:00
if err := allocDir.NewTaskDir(task.Name).Build(false, nil); err != nil {
allocDir.Destroy()
2018-06-13 22:33:25 +00:00
t.Fatalf("allocDir.NewTaskDir(%q) failed: %v", task.Name, err)
}
td := allocDir.TaskDirs[task.Name]
2018-12-07 01:54:14 +00:00
cmd := &ExecCommand{
Env: taskEnv.List(),
TaskDir: td.Dir,
2018-12-07 02:39:53 +00:00
Resources: &drivers.Resources{
2019-03-26 13:06:36 +00:00
NomadResources: &structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
CpuShares: 500,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 256,
},
},
LinuxResources: &drivers.LinuxResources{
CPUShares: 500,
MemoryLimitBytes: 256 * 1024 * 1024,
},
},
}
client: enable support for cgroups v2 This PR introduces support for using Nomad on systems with cgroups v2 [1] enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems for Nomad users. Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer, but not so for managing cpuset cgroups. Before, Nomad has been making use of a feature in v1 where a PID could be a member of more than one cgroup. In v2 this is no longer possible, and so the logic around computing cpuset values must be modified. When Nomad detects v2, it manages cpuset values in-process, rather than making use of cgroup heirarchy inheritence via shared/reserved parents. Nomad will only activate the v2 logic when it detects cgroups2 is mounted at /sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2 mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to use the v1 logic, and should operate as before. Systems that do not support cgroups v2 are also not affected. When v2 is activated, Nomad will create a parent called nomad.slice (unless otherwise configured in Client conifg), and create cgroups for tasks using naming convention <allocID>-<task>.scope. These follow the naming convention set by systemd and also used by Docker when cgroups v2 is detected. Client nodes now export a new fingerprint attribute, unique.cgroups.version which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by Nomad. The new cpuset management strategy fixes #11705, where docker tasks that spawned processes on startup would "leak". In cgroups v2, the PIDs are started in the cgroup they will always live in, and thus the cause of the leak is eliminated. [1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html Closes #11289 Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
if cgutil.UseV2 {
cmd.Resources.LinuxResources.CpusetCgroupPath = filepath.Join(cgutil.CgroupRoot, "testing.scope", cgutil.CgroupScope(alloc.ID, task.Name))
}
testCmd := &testExecCmd{
command: cmd,
allocDir: allocDir,
}
configureTLogging(t, testCmd)
return testCmd
2016-02-04 23:39:29 +00:00
}
2019-04-01 20:02:00 +00:00
// configureTLogging configures a test command executor with buffer as Std{out|err}
// but using os.Pipe so it mimics non-test case where cmd is set with files as Std{out|err}
// the buffers can be used to read command output
func configureTLogging(t *testing.T, testcmd *testExecCmd) {
var stdout, stderr bytes.Buffer
var copyDone sync.WaitGroup
stdoutPr, stdoutPw, err := os.Pipe()
require.NoError(t, err)
2016-02-04 23:39:29 +00:00
stderrPr, stderrPw, err := os.Pipe()
require.NoError(t, err)
2016-10-12 18:35:29 +00:00
copyDone.Add(2)
go func() {
defer copyDone.Done()
io.Copy(&stdout, stdoutPr)
}()
go func() {
defer copyDone.Done()
io.Copy(&stderr, stderrPr)
}()
testcmd.stdout = &stdout
testcmd.stderr = &stderr
testcmd.outputCopyDone = &copyDone
testcmd.command.stdout = stdoutPw
testcmd.command.stderr = stderrPw
return
2016-02-04 23:39:29 +00:00
}
func TestExecutor_Start_Invalid(t *testing.T) {
ci.Parallel(t)
invalid := "/bin/foobar"
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = invalid
execCmd.Args = []string{"1"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
_, err := executor.Launch(execCmd)
require.Error(err)
})
2016-02-04 23:39:29 +00:00
}
}
func TestExecutor_Start_Wait_Failure_Code(t *testing.T) {
ci.Parallel(t)
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/sh"
execCmd.Args = []string{"-c", "sleep 1; /bin/date fail"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
2018-12-05 16:07:48 +00:00
ps, _ = executor.Wait(context.Background())
require.NotZero(ps.ExitCode, "expected exit code to be non zero")
require.NoError(executor.Shutdown("SIGINT", 100*time.Millisecond))
})
2016-04-01 20:28:20 +00:00
}
}
func TestExecutor_Start_Wait(t *testing.T) {
ci.Parallel(t)
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/echo"
execCmd.Args = []string{"hello world"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
2018-12-05 16:07:48 +00:00
ps, err = executor.Wait(context.Background())
require.NoError(err)
require.NoError(executor.Shutdown("SIGINT", 100*time.Millisecond))
expected := "hello world"
tu.WaitForResult(func() (bool, error) {
act := strings.TrimSpace(string(testExecCmd.stdout.String()))
if expected != act {
return false, fmt.Errorf("expected: '%s' actual: '%s'", expected, act)
}
return true, nil
}, func(err error) {
require.NoError(err)
})
})
2016-02-04 23:39:29 +00:00
}
}
2016-02-04 23:39:29 +00:00
func TestExecutor_Start_Wait_Children(t *testing.T) {
ci.Parallel(t)
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/sh"
execCmd.Args = []string{"-c", "(sleep 30 > /dev/null & ) ; exec sleep 1"}
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("SIGKILL", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
ch := make(chan error)
go func() {
ps, err = executor.Wait(context.Background())
t.Logf("Processe completed with %#v error: %#v", ps, err)
ch <- err
}()
timeout := 7 * time.Second
select {
case <-ch:
require.NoError(err)
//good
case <-time.After(timeout):
require.Fail(fmt.Sprintf("process is running after timeout: %v", timeout))
}
})
}
}
func TestExecutor_WaitExitSignal(t *testing.T) {
ci.Parallel(t)
client: enable support for cgroups v2 This PR introduces support for using Nomad on systems with cgroups v2 [1] enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems for Nomad users. Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer, but not so for managing cpuset cgroups. Before, Nomad has been making use of a feature in v1 where a PID could be a member of more than one cgroup. In v2 this is no longer possible, and so the logic around computing cpuset values must be modified. When Nomad detects v2, it manages cpuset values in-process, rather than making use of cgroup heirarchy inheritence via shared/reserved parents. Nomad will only activate the v2 logic when it detects cgroups2 is mounted at /sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2 mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to use the v1 logic, and should operate as before. Systems that do not support cgroups v2 are also not affected. When v2 is activated, Nomad will create a parent called nomad.slice (unless otherwise configured in Client conifg), and create cgroups for tasks using naming convention <allocID>-<task>.scope. These follow the naming convention set by systemd and also used by Docker when cgroups v2 is detected. Client nodes now export a new fingerprint attribute, unique.cgroups.version which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by Nomad. The new cpuset management strategy fixes #11705, where docker tasks that spawned processes on startup would "leak". In cgroups v2, the PIDs are started in the cgroup they will always live in, and thus the cause of the leak is eliminated. [1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html Closes #11289 Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
testutil.CgroupsCompatibleV1(t) // todo(shoenig) #12351
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/sleep"
execCmd.Args = []string{"10000"}
2019-01-12 03:27:23 +00:00
execCmd.ResourceLimits = true
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
pState, err := executor.Launch(execCmd)
require.NoError(t, err)
go func() {
tu.WaitForResult(func() (bool, error) {
ch, err := executor.Stats(context.Background(), time.Second)
if err != nil {
return false, err
}
select {
case <-time.After(time.Second):
return false, fmt.Errorf("stats failed to send on interval")
case ru := <-ch:
assert.NotEmpty(t, ru.Pids, "no pids recorded in stats")
// just checking we measured something; each executor type has its own abilities,
// and e.g. cgroup v2 provides different information than cgroup v1
assert.NotEmpty(t, ru.ResourceUsage.MemoryStats.Measured)
assert.WithinDuration(t, time.Now(), time.Unix(0, ru.Timestamp), time.Second)
}
proc, err := os.FindProcess(pState.Pid)
if err != nil {
return false, err
}
err = proc.Signal(syscall.SIGKILL)
if err != nil {
return false, err
}
return true, nil
}, func(err error) {
2019-01-12 03:27:23 +00:00
assert.NoError(t, executor.Signal(os.Kill))
assert.NoError(t, err)
})
}()
pState, err = executor.Wait(context.Background())
require.NoError(t, err)
require.Equal(t, pState.Signal, int(syscall.SIGKILL))
})
2016-02-04 23:39:29 +00:00
}
}
2016-02-04 23:39:29 +00:00
func TestExecutor_Start_Kill(t *testing.T) {
ci.Parallel(t)
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/sleep"
execCmd.Args = []string{"10"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
require.NoError(executor.Shutdown("SIGINT", 100*time.Millisecond))
time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second)
output := testExecCmd.stdout.String()
expected := ""
act := strings.TrimSpace(string(output))
if act != expected {
t.Fatalf("Command output incorrectly: want %v; got %v", expected, act)
}
})
2016-02-04 23:39:29 +00:00
}
}
2016-03-19 19:18:10 +00:00
func TestExecutor_Shutdown_Exit(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "/bin/sleep"
execCmd.Args = []string{"100"}
cfg := &ExecutorConfig{
LogFile: "/dev/null",
}
executor, pluginClient, err := CreateExecutor(testlog.HCLogger(t), nil, cfg)
require.NoError(err)
proc, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(proc.Pid)
executor.Shutdown("", 0)
pluginClient.Kill()
tu.WaitForResult(func() (bool, error) {
p, err := ps.FindProcess(proc.Pid)
if err != nil {
return false, err
}
return p == nil, fmt.Errorf("process found: %d", proc.Pid)
}, func(err error) {
require.NoError(err)
})
require.NoError(allocDir.Destroy())
}
func TestUniversalExecutor_MakeExecutable(t *testing.T) {
ci.Parallel(t)
2016-03-19 19:18:10 +00:00
// Create a temp file
f, err := ioutil.TempFile("", "")
if err != nil {
t.Fatal(err)
}
defer f.Close()
defer os.Remove(f.Name())
// Set its permissions to be non-executable
f.Chmod(os.FileMode(0610))
2018-10-26 15:30:12 +00:00
err = makeExecutable(f.Name())
2016-03-19 19:18:10 +00:00
if err != nil {
t.Fatalf("makeExecutable() failed: %v", err)
}
// Check the permissions
stat, err := f.Stat()
if err != nil {
t.Fatalf("Stat() failed: %v", err)
}
act := stat.Mode().Perm()
exp := os.FileMode(0755)
if act != exp {
t.Fatalf("expected permissions %v; got %v", exp, act)
2016-03-19 19:18:10 +00:00
}
}
func TestUniversalExecutor_LookupPath(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Create a temp dir
tmpDir := t.TempDir()
// Make a foo subdir
os.MkdirAll(filepath.Join(tmpDir, "foo"), 0700)
// Write a file under foo
filePath := filepath.Join(tmpDir, "foo", "tmp.txt")
err := ioutil.WriteFile(filePath, []byte{1, 2}, os.ModeAppend)
require.Nil(err)
// Lookup with full path on host to binary
path, err := lookupBin("not_tmpDir", filePath)
require.Nil(err)
require.Equal(filePath, path)
// Lookout with an absolute path to the binary
_, err = lookupBin(tmpDir, "/foo/tmp.txt")
require.Nil(err)
// Write a file under task dir
filePath3 := filepath.Join(tmpDir, "tmp.txt")
ioutil.WriteFile(filePath3, []byte{1, 2}, os.ModeAppend)
// Lookup with file name, should find the one we wrote above
path, err = lookupBin(tmpDir, "tmp.txt")
require.Nil(err)
require.Equal(filepath.Join(tmpDir, "tmp.txt"), path)
// Write a file under local subdir
os.MkdirAll(filepath.Join(tmpDir, "local"), 0700)
filePath2 := filepath.Join(tmpDir, "local", "tmp.txt")
ioutil.WriteFile(filePath2, []byte{1, 2}, os.ModeAppend)
// Lookup with file name, should find the one we wrote above
path, err = lookupBin(tmpDir, "tmp.txt")
require.Nil(err)
require.Equal(filepath.Join(tmpDir, "local", "tmp.txt"), path)
// Lookup a host path
_, err = lookupBin(tmpDir, "/bin/sh")
require.NoError(err)
// Lookup a host path via $PATH
_, err = lookupBin(tmpDir, "sh")
require.NoError(err)
}
// setupRoootfs setups the rootfs for libcontainer executor
// It uses busybox to make some binaries available - somewhat cheaper
// than mounting the underlying host filesystem
func setupRootfs(t *testing.T, rootfs string) {
paths := []string{
2019-01-12 03:27:23 +00:00
"/bin/sh",
"/bin/sleep",
"/bin/echo",
"/bin/date",
}
for _, p := range paths {
setupRootfsBinary(t, rootfs, p)
}
}
// setupRootfsBinary installs a busybox link in the desired path
func setupRootfsBinary(t *testing.T, rootfs, path string) {
t.Helper()
dst := filepath.Join(rootfs, path)
2019-05-24 15:30:16 +00:00
err := os.MkdirAll(filepath.Dir(dst), 0755)
require.NoError(t, err)
src := filepath.Join(
"test-resources", "busybox",
fmt.Sprintf("busybox-%s", runtime.GOARCH),
)
err = os.Link(src, dst)
if err != nil {
// On failure, fallback to copying the file directly.
// Linking may fail if the test source code lives on a separate
// volume/partition from the temp dir used for testing
copyFile(t, src, dst)
}
}
func copyFile(t *testing.T, src, dst string) {
in, err := os.Open(src)
require.NoErrorf(t, err, "copying %v -> %v", src, dst)
defer in.Close()
ins, err := in.Stat()
require.NoErrorf(t, err, "copying %v -> %v", src, dst)
out, err := os.OpenFile(dst, os.O_RDWR|os.O_CREATE, ins.Mode())
require.NoErrorf(t, err, "copying %v -> %v", src, dst)
defer func() {
if err := out.Close(); err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
}()
_, err = io.Copy(out, in)
require.NoErrorf(t, err, "copying %v -> %v", src, dst)
}
2019-03-18 17:00:07 +00:00
// TestExecutor_Start_Kill_Immediately_NoGrace asserts that executors shutdown
// immediately when sent a kill signal with no grace period.
func TestExecutor_Start_Kill_Immediately_NoGrace(t *testing.T) {
ci.Parallel(t)
2019-03-18 17:00:07 +00:00
for name, factory := range executorFactories {
client: enable support for cgroups v2 This PR introduces support for using Nomad on systems with cgroups v2 [1] enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems for Nomad users. Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer, but not so for managing cpuset cgroups. Before, Nomad has been making use of a feature in v1 where a PID could be a member of more than one cgroup. In v2 this is no longer possible, and so the logic around computing cpuset values must be modified. When Nomad detects v2, it manages cpuset values in-process, rather than making use of cgroup heirarchy inheritence via shared/reserved parents. Nomad will only activate the v2 logic when it detects cgroups2 is mounted at /sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2 mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to use the v1 logic, and should operate as before. Systems that do not support cgroups v2 are also not affected. When v2 is activated, Nomad will create a parent called nomad.slice (unless otherwise configured in Client conifg), and create cgroups for tasks using naming convention <allocID>-<task>.scope. These follow the naming convention set by systemd and also used by Docker when cgroups v2 is detected. Client nodes now export a new fingerprint attribute, unique.cgroups.version which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by Nomad. The new cpuset management strategy fixes #11705, where docker tasks that spawned processes on startup would "leak". In cgroups v2, the PIDs are started in the cgroup they will always live in, and thus the cause of the leak is eliminated. [1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html Closes #11289 Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
t.Run(name, func(t *testing.T) {
2019-03-18 17:00:07 +00:00
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
2019-03-18 17:00:07 +00:00
execCmd.Cmd = "/bin/sleep"
execCmd.Args = []string{"100"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
2019-03-18 17:00:07 +00:00
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
2019-03-18 17:00:07 +00:00
defer executor.Shutdown("", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
waitCh := make(chan interface{})
go func() {
defer close(waitCh)
executor.Wait(context.Background())
}()
require.NoError(executor.Shutdown("SIGKILL", 0))
select {
case <-waitCh:
// all good!
case <-time.After(4 * time.Second * time.Duration(tu.TestMultiplier())):
require.Fail("process did not terminate despite SIGKILL")
}
})
}
}
func TestExecutor_Start_Kill_Immediately_WithGrace(t *testing.T) {
ci.Parallel(t)
2019-03-18 17:00:07 +00:00
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
2019-03-18 17:00:07 +00:00
require := require.New(t)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
2019-03-18 17:00:07 +00:00
execCmd.Cmd = "/bin/sleep"
execCmd.Args = []string{"100"}
2019-03-26 13:06:36 +00:00
factory.configureExecCmd(t, execCmd)
2019-03-18 17:00:07 +00:00
defer allocDir.Destroy()
2019-03-26 13:06:36 +00:00
executor := factory.new(testlog.HCLogger(t))
2019-03-18 17:00:07 +00:00
defer executor.Shutdown("", 0)
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
waitCh := make(chan interface{})
go func() {
defer close(waitCh)
executor.Wait(context.Background())
}()
require.NoError(executor.Shutdown("SIGKILL", 100*time.Millisecond))
select {
case <-waitCh:
// all good!
case <-time.After(4 * time.Second * time.Duration(tu.TestMultiplier())):
require.Fail("process did not terminate despite SIGKILL")
}
})
}
}
2019-04-02 23:37:30 +00:00
// TestExecutor_Start_NonExecutableBinaries asserts that executor marks binary as executable
// before starting
func TestExecutor_Start_NonExecutableBinaries(t *testing.T) {
ci.Parallel(t)
2019-04-02 23:37:30 +00:00
for name, factory := range executorFactories {
t.Run(name, func(t *testing.T) {
2019-04-02 23:37:30 +00:00
require := require.New(t)
tmpDir := t.TempDir()
2019-04-02 23:37:30 +00:00
nonExecutablePath := filepath.Join(tmpDir, "nonexecutablefile")
ioutil.WriteFile(nonExecutablePath,
[]byte("#!/bin/sh\necho hello world"),
0600)
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = nonExecutablePath
factory.configureExecCmd(t, execCmd)
executor := factory.new(testlog.HCLogger(t))
defer executor.Shutdown("", 0)
// need to configure path in chroot with that file if using isolation executor
if _, ok := executor.(*UniversalExecutor); !ok {
taskName := filepath.Base(testExecCmd.command.TaskDir)
err := allocDir.NewTaskDir(taskName).Build(true, map[string]string{
tmpDir: tmpDir,
})
require.NoError(err)
}
defer allocDir.Destroy()
ps, err := executor.Launch(execCmd)
require.NoError(err)
require.NotZero(ps.Pid)
ps, err = executor.Wait(context.Background())
require.NoError(err)
require.NoError(executor.Shutdown("SIGINT", 100*time.Millisecond))
expected := "hello world"
tu.WaitForResult(func() (bool, error) {
act := strings.TrimSpace(string(testExecCmd.stdout.String()))
if expected != act {
return false, fmt.Errorf("expected: '%s' actual: '%s'", expected, act)
}
return true, nil
}, func(err error) {
stderr := strings.TrimSpace(string(testExecCmd.stderr.String()))
t.Logf("stderr: %v", stderr)
2019-04-02 23:37:30 +00:00
require.NoError(err)
})
})
}
}