open-nomad/client/allocrunner/consul_http_sock_hook_test.go
Michael Schurter 10c3bad652 client: never embed alloc_dir in chroot
Fixes #2522

Skip embedding client.alloc_dir when building chroot. If a user
configures a Nomad client agent so that the chroot_env will embed the
client.alloc_dir, Nomad will happily infinitely recurse while building
the chroot until something horrible happens. The best case scenario is
the filesystem's path length limit is hit. The worst case scenario is
disk space is exhausted.

A bad agent configuration will look something like this:

```hcl
data_dir = "/tmp/nomad-badagent"

client {
  enabled = true

  chroot_env {
    # Note that the source matches the data_dir
    "/tmp/nomad-badagent" = "/ohno"
    # ...
  }
}
```

Note that `/ohno/client` (the state_dir) will still be created but not
`/ohno/alloc` (the alloc_dir).
While I cannot think of a good reason why someone would want to embed
Nomad's client (and possibly server) directories in chroots, there
should be no cause for harm. chroots are only built when Nomad runs as
root, and Nomad disables running exec jobs as root by default. Therefore
even if client state is copied into chroots, it will be inaccessible to
tasks.

Skipping the `data_dir` and `{client,server}.state_dir` is possible, but
this PR attempts to implement the minimum viable solution to reduce risk
of unintended side effects or bugs.

When running tests as root in a vm without the fix, the following error
occurs:

```
=== RUN   TestAllocDir_SkipAllocDir
    alloc_dir_test.go:520:
                Error Trace:    alloc_dir_test.go:520
                Error:          Received unexpected error:
                                Couldn't create destination file /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/testtask/nomad/test/testtask/.../nomad/test/testtask/secrets/.nomad-mount: open /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/.../testtask/secrets/.nomad-mount: file name too long
                Test:           TestAllocDir_SkipAllocDir
--- FAIL: TestAllocDir_SkipAllocDir (22.76s)
```

Also removed unused Copy methods on AllocDir and TaskDir structs.

Thanks to @eveld for not letting me forget about this!
2021-10-18 09:22:01 -07:00

123 lines
3.1 KiB
Go

package allocrunner
import (
"bytes"
"net"
"path/filepath"
"testing"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/stretchr/testify/require"
)
func TestConsulSocketHook_PrerunPostrun_Ok(t *testing.T) {
t.Parallel()
fakeConsul, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer fakeConsul.Close()
consulConfig := &config.ConsulConfig{
Addr: fakeConsul.Addr().String(),
}
alloc := mock.ConnectNativeAlloc("bridge")
logger := testlog.HCLogger(t)
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask", alloc.ID)
defer cleanupDir()
// start unix socket proxy
h := newConsulHTTPSocketHook(logger, alloc, allocDir, consulConfig)
require.NoError(t, h.Prerun())
httpSocket := filepath.Join(allocDir.AllocDir, allocdir.AllocHTTPSocket)
taskCon, err := net.Dial("unix", httpSocket)
require.NoError(t, err)
// write to consul from task to ensure data is proxied out of the netns
input := bytes.Repeat([]byte{'X'}, 5*1024)
errCh := make(chan error, 1)
go func() {
_, err := taskCon.Write(input)
errCh <- err
}()
// accept the connection from inside the netns
consulConn, err := fakeConsul.Accept()
require.NoError(t, err)
defer consulConn.Close()
output := make([]byte, len(input))
_, err = consulConn.Read(output)
require.NoError(t, err)
require.NoError(t, <-errCh)
require.Equal(t, input, output)
// read from consul to ensure http response bodies can come back
input = bytes.Repeat([]byte{'Y'}, 5*1024)
go func() {
_, err := consulConn.Write(input)
errCh <- err
}()
output = make([]byte, len(input))
_, err = taskCon.Read(output)
require.NoError(t, err)
require.NoError(t, <-errCh)
require.Equal(t, input, output)
// stop the unix socket proxy
require.NoError(t, h.Postrun())
// consul reads should now error
n, err := consulConn.Read(output)
require.Error(t, err)
require.Zero(t, n)
// task reads and writes should error
n, err = taskCon.Write(input)
require.Error(t, err)
require.Zero(t, n)
n, err = taskCon.Read(output)
require.Error(t, err)
require.Zero(t, n)
}
func TestConsulHTTPSocketHook_Prerun_Error(t *testing.T) {
t.Parallel()
logger := testlog.HCLogger(t)
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask", alloc.ID)
defer cleanupDir()
consulConfig := new(config.ConsulConfig)
alloc := mock.Alloc()
connectNativeAlloc := mock.ConnectNativeAlloc("bridge")
{
// an alloc without a connect native task should not return an error
h := newConsulHTTPSocketHook(logger, alloc, allocDir, consulConfig)
require.NoError(t, h.Prerun())
// postrun should be a noop
require.NoError(t, h.Postrun())
}
{
// an alloc with a native task should return an error when consul is not
// configured
h := newConsulHTTPSocketHook(logger, connectNativeAlloc, allocDir, consulConfig)
require.EqualError(t, h.Prerun(), "consul address must be set on nomad client")
// Postrun should be a noop
require.NoError(t, h.Postrun())
}
}