open-nomad/client/allocwatcher/alloc_watcher_test.go
Michael Schurter 10c3bad652 client: never embed alloc_dir in chroot
Fixes #2522

Skip embedding client.alloc_dir when building chroot. If a user
configures a Nomad client agent so that the chroot_env will embed the
client.alloc_dir, Nomad will happily infinitely recurse while building
the chroot until something horrible happens. The best case scenario is
the filesystem's path length limit is hit. The worst case scenario is
disk space is exhausted.

A bad agent configuration will look something like this:

```hcl
data_dir = "/tmp/nomad-badagent"

client {
  enabled = true

  chroot_env {
    # Note that the source matches the data_dir
    "/tmp/nomad-badagent" = "/ohno"
    # ...
  }
}
```

Note that `/ohno/client` (the state_dir) will still be created but not
`/ohno/alloc` (the alloc_dir).
While I cannot think of a good reason why someone would want to embed
Nomad's client (and possibly server) directories in chroots, there
should be no cause for harm. chroots are only built when Nomad runs as
root, and Nomad disables running exec jobs as root by default. Therefore
even if client state is copied into chroots, it will be inaccessible to
tasks.

Skipping the `data_dir` and `{client,server}.state_dir` is possible, but
this PR attempts to implement the minimum viable solution to reduce risk
of unintended side effects or bugs.

When running tests as root in a vm without the fix, the following error
occurs:

```
=== RUN   TestAllocDir_SkipAllocDir
    alloc_dir_test.go:520:
                Error Trace:    alloc_dir_test.go:520
                Error:          Received unexpected error:
                                Couldn't create destination file /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/testtask/nomad/test/testtask/.../nomad/test/testtask/secrets/.nomad-mount: open /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/.../testtask/secrets/.nomad-mount: file name too long
                Test:           TestAllocDir_SkipAllocDir
--- FAIL: TestAllocDir_SkipAllocDir (22.76s)
```

Also removed unused Copy methods on AllocDir and TaskDir structs.

Thanks to @eveld for not letting me forget about this!
2021-10-18 09:22:01 -07:00

274 lines
7.5 KiB
Go

package allocwatcher
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"time"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocdir"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
// fakeAllocRunner implements AllocRunnerMeta
type fakeAllocRunner struct {
alloc *structs.Allocation
AllocDir *allocdir.AllocDir
Broadcaster *cstructs.AllocBroadcaster
}
// newFakeAllocRunner creates a new AllocRunnerMeta. Callers must call
// AllocDir.Destroy() when finished.
func newFakeAllocRunner(t *testing.T, logger hclog.Logger) *fakeAllocRunner {
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true
alloc.Job.TaskGroups[0].EphemeralDisk.Migrate = true
path := t.TempDir()
return &fakeAllocRunner{
alloc: alloc,
AllocDir: allocdir.NewAllocDir(logger, path, alloc.ID),
Broadcaster: cstructs.NewAllocBroadcaster(logger),
}
}
func (f *fakeAllocRunner) GetAllocDir() *allocdir.AllocDir {
return f.AllocDir
}
func (f *fakeAllocRunner) Listener() *cstructs.AllocListener {
return f.Broadcaster.Listen()
}
func (f *fakeAllocRunner) Alloc() *structs.Allocation {
return f.alloc
}
// newConfig returns a new Config and cleanup func
func newConfig(t *testing.T) (Config, func()) {
logger := testlog.HCLogger(t)
prevAR := newFakeAllocRunner(t, logger)
alloc := mock.Alloc()
alloc.PreviousAllocation = prevAR.Alloc().ID
alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true
alloc.Job.TaskGroups[0].EphemeralDisk.Migrate = true
alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
config := Config{
Alloc: alloc,
PreviousRunner: prevAR,
RPC: nil,
Config: nil,
MigrateToken: "fake_token",
Logger: logger,
}
cleanup := func() {
prevAR.AllocDir.Destroy()
}
return config, cleanup
}
// TestPrevAlloc_Noop asserts that when no previous allocation is set the noop
// implementation is returned that does not block or perform migrations.
func TestPrevAlloc_Noop(t *testing.T) {
conf, cleanup := newConfig(t)
defer cleanup()
conf.Alloc.PreviousAllocation = ""
watcher, migrator := NewAllocWatcher(conf)
require.NotNil(t, watcher)
_, ok := migrator.(NoopPrevAlloc)
require.True(t, ok, "expected migrator to be NoopPrevAlloc")
done := make(chan int, 2)
go func() {
watcher.Wait(context.Background())
done <- 1
migrator.Migrate(context.Background(), nil)
done <- 1
}()
require.False(t, watcher.IsWaiting())
require.False(t, migrator.IsMigrating())
<-done
<-done
}
// TestPrevAlloc_LocalPrevAlloc_Block asserts that when a previous alloc runner
// is set a localPrevAlloc will block on it.
func TestPrevAlloc_LocalPrevAlloc_Block(t *testing.T) {
t.Parallel()
conf, cleanup := newConfig(t)
defer cleanup()
conf.Alloc.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{
"run_for": "500ms",
}
_, waiter := NewAllocWatcher(conf)
// Wait in a goroutine with a context to make sure it exits at the right time
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
defer cancel()
waiter.Wait(ctx)
}()
// Assert watcher is waiting
testutil.WaitForResult(func() (bool, error) {
return waiter.IsWaiting(), fmt.Errorf("expected watcher to be waiting")
}, func(err error) {
t.Fatalf("error: %v", err)
})
// Broadcast a non-terminal alloc update to assert only terminal
// updates break out of waiting.
update := conf.PreviousRunner.Alloc().Copy()
update.DesiredStatus = structs.AllocDesiredStatusStop
update.ModifyIndex++
update.AllocModifyIndex++
broadcaster := conf.PreviousRunner.(*fakeAllocRunner).Broadcaster
err := broadcaster.Send(update)
require.NoError(t, err)
// Assert watcher is still waiting because alloc isn't terminal
testutil.WaitForResult(func() (bool, error) {
return waiter.IsWaiting(), fmt.Errorf("expected watcher to be waiting")
}, func(err error) {
t.Fatalf("error: %v", err)
})
// Stop the previous alloc and assert watcher stops blocking
update = update.Copy()
update.DesiredStatus = structs.AllocDesiredStatusStop
update.ClientStatus = structs.AllocClientStatusComplete
update.ModifyIndex++
update.AllocModifyIndex++
err = broadcaster.Send(update)
require.NoError(t, err)
testutil.WaitForResult(func() (bool, error) {
if waiter.IsWaiting() {
return false, fmt.Errorf("did not expect watcher to be waiting")
}
return !waiter.IsMigrating(), fmt.Errorf("did not expect watcher to be migrating")
}, func(err error) {
t.Fatalf("error: %v", err)
})
}
// TestPrevAlloc_LocalPrevAlloc_Terminated asserts that when a previous alloc
// runner has already terminated the watcher does not block on the broadcaster.
func TestPrevAlloc_LocalPrevAlloc_Terminated(t *testing.T) {
t.Parallel()
conf, cleanup := newConfig(t)
defer cleanup()
conf.PreviousRunner.Alloc().ClientStatus = structs.AllocClientStatusComplete
waiter, _ := NewAllocWatcher(conf)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
// Since prev alloc is terminal, Wait should exit immediately with no
// context error
require.NoError(t, waiter.Wait(ctx))
}
// TestPrevAlloc_StreamAllocDir_Error asserts that errors encountered while
// streaming a tar cause the migration to be cancelled and no files are written
// (migrations are atomic).
func TestPrevAlloc_StreamAllocDir_Error(t *testing.T) {
t.Parallel()
dest, err := ioutil.TempDir("", "nomadtest-")
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dest)
// This test only unit tests streamAllocDir so we only need a partially
// complete remotePrevAlloc
prevAlloc := &remotePrevAlloc{
logger: testlog.HCLogger(t),
allocID: "123",
prevAllocID: "abc",
migrate: true,
}
tarBuf := bytes.NewBuffer(nil)
tw := tar.NewWriter(tarBuf)
fooHdr := tar.Header{
Name: "foo.txt",
Mode: 0666,
Size: 1,
ModTime: time.Now(),
Typeflag: tar.TypeReg,
}
err = tw.WriteHeader(&fooHdr)
if err != nil {
t.Fatalf("error writing file header: %v", err)
}
if _, err := tw.Write([]byte{'a'}); err != nil {
t.Fatalf("error writing file: %v", err)
}
// Now write the error file
contents := []byte("SENTINEL ERROR")
err = tw.WriteHeader(&tar.Header{
Name: allocdir.SnapshotErrorFilename(prevAlloc.prevAllocID),
Mode: 0666,
Size: int64(len(contents)),
AccessTime: allocdir.SnapshotErrorTime,
ChangeTime: allocdir.SnapshotErrorTime,
ModTime: allocdir.SnapshotErrorTime,
Typeflag: tar.TypeReg,
})
if err != nil {
t.Fatalf("error writing sentinel file header: %v", err)
}
if _, err := tw.Write(contents); err != nil {
t.Fatalf("error writing sentinel file: %v", err)
}
// Assert streamAllocDir fails
err = prevAlloc.streamAllocDir(context.Background(), ioutil.NopCloser(tarBuf), dest)
if err == nil {
t.Fatalf("expected an error from streamAllocDir")
}
if !strings.HasSuffix(err.Error(), string(contents)) {
t.Fatalf("expected error to end with %q but found: %v", string(contents), err)
}
// streamAllocDir leaves cleanup to the caller on error, so assert
// "foo.txt" was written
fi, err := os.Stat(filepath.Join(dest, "foo.txt"))
if err != nil {
t.Fatalf("error reading foo.txt: %v", err)
}
if fi.Size() != fooHdr.Size {
t.Fatalf("expected foo.txt to be size 1 but found %d", fi.Size())
}
}