10c3bad652
Fixes #2522 Skip embedding client.alloc_dir when building chroot. If a user configures a Nomad client agent so that the chroot_env will embed the client.alloc_dir, Nomad will happily infinitely recurse while building the chroot until something horrible happens. The best case scenario is the filesystem's path length limit is hit. The worst case scenario is disk space is exhausted. A bad agent configuration will look something like this: ```hcl data_dir = "/tmp/nomad-badagent" client { enabled = true chroot_env { # Note that the source matches the data_dir "/tmp/nomad-badagent" = "/ohno" # ... } } ``` Note that `/ohno/client` (the state_dir) will still be created but not `/ohno/alloc` (the alloc_dir). While I cannot think of a good reason why someone would want to embed Nomad's client (and possibly server) directories in chroots, there should be no cause for harm. chroots are only built when Nomad runs as root, and Nomad disables running exec jobs as root by default. Therefore even if client state is copied into chroots, it will be inaccessible to tasks. Skipping the `data_dir` and `{client,server}.state_dir` is possible, but this PR attempts to implement the minimum viable solution to reduce risk of unintended side effects or bugs. When running tests as root in a vm without the fix, the following error occurs: ``` === RUN TestAllocDir_SkipAllocDir alloc_dir_test.go:520: Error Trace: alloc_dir_test.go:520 Error: Received unexpected error: Couldn't create destination file /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/testtask/nomad/test/testtask/.../nomad/test/testtask/secrets/.nomad-mount: open /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/.../testtask/secrets/.nomad-mount: file name too long Test: TestAllocDir_SkipAllocDir --- FAIL: TestAllocDir_SkipAllocDir (22.76s) ``` Also removed unused Copy methods on AllocDir and TaskDir structs. Thanks to @eveld for not letting me forget about this!
274 lines
7.5 KiB
Go
274 lines
7.5 KiB
Go
package allocwatcher
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/allocdir"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// fakeAllocRunner implements AllocRunnerMeta
|
|
type fakeAllocRunner struct {
|
|
alloc *structs.Allocation
|
|
AllocDir *allocdir.AllocDir
|
|
Broadcaster *cstructs.AllocBroadcaster
|
|
}
|
|
|
|
// newFakeAllocRunner creates a new AllocRunnerMeta. Callers must call
|
|
// AllocDir.Destroy() when finished.
|
|
func newFakeAllocRunner(t *testing.T, logger hclog.Logger) *fakeAllocRunner {
|
|
alloc := mock.Alloc()
|
|
alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true
|
|
alloc.Job.TaskGroups[0].EphemeralDisk.Migrate = true
|
|
|
|
path := t.TempDir()
|
|
|
|
return &fakeAllocRunner{
|
|
alloc: alloc,
|
|
AllocDir: allocdir.NewAllocDir(logger, path, alloc.ID),
|
|
Broadcaster: cstructs.NewAllocBroadcaster(logger),
|
|
}
|
|
}
|
|
|
|
func (f *fakeAllocRunner) GetAllocDir() *allocdir.AllocDir {
|
|
return f.AllocDir
|
|
}
|
|
|
|
func (f *fakeAllocRunner) Listener() *cstructs.AllocListener {
|
|
return f.Broadcaster.Listen()
|
|
}
|
|
|
|
func (f *fakeAllocRunner) Alloc() *structs.Allocation {
|
|
return f.alloc
|
|
}
|
|
|
|
// newConfig returns a new Config and cleanup func
|
|
func newConfig(t *testing.T) (Config, func()) {
|
|
logger := testlog.HCLogger(t)
|
|
|
|
prevAR := newFakeAllocRunner(t, logger)
|
|
|
|
alloc := mock.Alloc()
|
|
alloc.PreviousAllocation = prevAR.Alloc().ID
|
|
alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true
|
|
alloc.Job.TaskGroups[0].EphemeralDisk.Migrate = true
|
|
alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
|
|
|
config := Config{
|
|
Alloc: alloc,
|
|
PreviousRunner: prevAR,
|
|
RPC: nil,
|
|
Config: nil,
|
|
MigrateToken: "fake_token",
|
|
Logger: logger,
|
|
}
|
|
|
|
cleanup := func() {
|
|
prevAR.AllocDir.Destroy()
|
|
}
|
|
|
|
return config, cleanup
|
|
}
|
|
|
|
// TestPrevAlloc_Noop asserts that when no previous allocation is set the noop
|
|
// implementation is returned that does not block or perform migrations.
|
|
func TestPrevAlloc_Noop(t *testing.T) {
|
|
conf, cleanup := newConfig(t)
|
|
defer cleanup()
|
|
|
|
conf.Alloc.PreviousAllocation = ""
|
|
|
|
watcher, migrator := NewAllocWatcher(conf)
|
|
require.NotNil(t, watcher)
|
|
_, ok := migrator.(NoopPrevAlloc)
|
|
require.True(t, ok, "expected migrator to be NoopPrevAlloc")
|
|
|
|
done := make(chan int, 2)
|
|
go func() {
|
|
watcher.Wait(context.Background())
|
|
done <- 1
|
|
migrator.Migrate(context.Background(), nil)
|
|
done <- 1
|
|
}()
|
|
require.False(t, watcher.IsWaiting())
|
|
require.False(t, migrator.IsMigrating())
|
|
<-done
|
|
<-done
|
|
}
|
|
|
|
// TestPrevAlloc_LocalPrevAlloc_Block asserts that when a previous alloc runner
|
|
// is set a localPrevAlloc will block on it.
|
|
func TestPrevAlloc_LocalPrevAlloc_Block(t *testing.T) {
|
|
t.Parallel()
|
|
conf, cleanup := newConfig(t)
|
|
|
|
defer cleanup()
|
|
|
|
conf.Alloc.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{
|
|
"run_for": "500ms",
|
|
}
|
|
|
|
_, waiter := NewAllocWatcher(conf)
|
|
|
|
// Wait in a goroutine with a context to make sure it exits at the right time
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
go func() {
|
|
defer cancel()
|
|
waiter.Wait(ctx)
|
|
}()
|
|
|
|
// Assert watcher is waiting
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
return waiter.IsWaiting(), fmt.Errorf("expected watcher to be waiting")
|
|
}, func(err error) {
|
|
t.Fatalf("error: %v", err)
|
|
})
|
|
|
|
// Broadcast a non-terminal alloc update to assert only terminal
|
|
// updates break out of waiting.
|
|
update := conf.PreviousRunner.Alloc().Copy()
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
update.ModifyIndex++
|
|
update.AllocModifyIndex++
|
|
|
|
broadcaster := conf.PreviousRunner.(*fakeAllocRunner).Broadcaster
|
|
err := broadcaster.Send(update)
|
|
require.NoError(t, err)
|
|
|
|
// Assert watcher is still waiting because alloc isn't terminal
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
return waiter.IsWaiting(), fmt.Errorf("expected watcher to be waiting")
|
|
}, func(err error) {
|
|
t.Fatalf("error: %v", err)
|
|
})
|
|
|
|
// Stop the previous alloc and assert watcher stops blocking
|
|
update = update.Copy()
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
update.ClientStatus = structs.AllocClientStatusComplete
|
|
update.ModifyIndex++
|
|
update.AllocModifyIndex++
|
|
|
|
err = broadcaster.Send(update)
|
|
require.NoError(t, err)
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
if waiter.IsWaiting() {
|
|
return false, fmt.Errorf("did not expect watcher to be waiting")
|
|
}
|
|
return !waiter.IsMigrating(), fmt.Errorf("did not expect watcher to be migrating")
|
|
}, func(err error) {
|
|
t.Fatalf("error: %v", err)
|
|
})
|
|
}
|
|
|
|
// TestPrevAlloc_LocalPrevAlloc_Terminated asserts that when a previous alloc
|
|
// runner has already terminated the watcher does not block on the broadcaster.
|
|
func TestPrevAlloc_LocalPrevAlloc_Terminated(t *testing.T) {
|
|
t.Parallel()
|
|
conf, cleanup := newConfig(t)
|
|
defer cleanup()
|
|
|
|
conf.PreviousRunner.Alloc().ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
waiter, _ := NewAllocWatcher(conf)
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
|
defer cancel()
|
|
|
|
// Since prev alloc is terminal, Wait should exit immediately with no
|
|
// context error
|
|
require.NoError(t, waiter.Wait(ctx))
|
|
}
|
|
|
|
// TestPrevAlloc_StreamAllocDir_Error asserts that errors encountered while
|
|
// streaming a tar cause the migration to be cancelled and no files are written
|
|
// (migrations are atomic).
|
|
func TestPrevAlloc_StreamAllocDir_Error(t *testing.T) {
|
|
t.Parallel()
|
|
dest, err := ioutil.TempDir("", "nomadtest-")
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
defer os.RemoveAll(dest)
|
|
|
|
// This test only unit tests streamAllocDir so we only need a partially
|
|
// complete remotePrevAlloc
|
|
prevAlloc := &remotePrevAlloc{
|
|
logger: testlog.HCLogger(t),
|
|
allocID: "123",
|
|
prevAllocID: "abc",
|
|
migrate: true,
|
|
}
|
|
|
|
tarBuf := bytes.NewBuffer(nil)
|
|
tw := tar.NewWriter(tarBuf)
|
|
fooHdr := tar.Header{
|
|
Name: "foo.txt",
|
|
Mode: 0666,
|
|
Size: 1,
|
|
ModTime: time.Now(),
|
|
Typeflag: tar.TypeReg,
|
|
}
|
|
err = tw.WriteHeader(&fooHdr)
|
|
if err != nil {
|
|
t.Fatalf("error writing file header: %v", err)
|
|
}
|
|
if _, err := tw.Write([]byte{'a'}); err != nil {
|
|
t.Fatalf("error writing file: %v", err)
|
|
}
|
|
|
|
// Now write the error file
|
|
contents := []byte("SENTINEL ERROR")
|
|
err = tw.WriteHeader(&tar.Header{
|
|
Name: allocdir.SnapshotErrorFilename(prevAlloc.prevAllocID),
|
|
Mode: 0666,
|
|
Size: int64(len(contents)),
|
|
AccessTime: allocdir.SnapshotErrorTime,
|
|
ChangeTime: allocdir.SnapshotErrorTime,
|
|
ModTime: allocdir.SnapshotErrorTime,
|
|
Typeflag: tar.TypeReg,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("error writing sentinel file header: %v", err)
|
|
}
|
|
if _, err := tw.Write(contents); err != nil {
|
|
t.Fatalf("error writing sentinel file: %v", err)
|
|
}
|
|
|
|
// Assert streamAllocDir fails
|
|
err = prevAlloc.streamAllocDir(context.Background(), ioutil.NopCloser(tarBuf), dest)
|
|
if err == nil {
|
|
t.Fatalf("expected an error from streamAllocDir")
|
|
}
|
|
if !strings.HasSuffix(err.Error(), string(contents)) {
|
|
t.Fatalf("expected error to end with %q but found: %v", string(contents), err)
|
|
}
|
|
|
|
// streamAllocDir leaves cleanup to the caller on error, so assert
|
|
// "foo.txt" was written
|
|
fi, err := os.Stat(filepath.Join(dest, "foo.txt"))
|
|
if err != nil {
|
|
t.Fatalf("error reading foo.txt: %v", err)
|
|
}
|
|
if fi.Size() != fooHdr.Size {
|
|
t.Fatalf("expected foo.txt to be size 1 but found %d", fi.Size())
|
|
}
|
|
}
|