e2e: test for host volumes and Docker volumes (#8972)

Exercises host volume and Docker volume functionality for the `exec` and `docker` task driver, particularly around mounting locations within the container and how this can be used with `template`.
2020-09-28 11:14:13 -04:00 · 2020-09-28 11:14:13 -04:00 · 1311f32f1b
parent 566dae7b19
commit 1311f32f1b
6 changed files with 239 additions and 167 deletions
--- a/e2e/e2e_test.go
+++ b/e2e/e2e_test.go
@ -16,7 +16,6 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/csi"
 	_ "github.com/hashicorp/nomad/e2e/deployment"
 	_ "github.com/hashicorp/nomad/e2e/example"
 	_ "github.com/hashicorp/nomad/e2e/hostvolumes"
 	_ "github.com/hashicorp/nomad/e2e/lifecycle"
 	_ "github.com/hashicorp/nomad/e2e/metrics"
 	_ "github.com/hashicorp/nomad/e2e/nodedrain"
@ -27,6 +26,7 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/spread"
 	_ "github.com/hashicorp/nomad/e2e/systemsched"
 	_ "github.com/hashicorp/nomad/e2e/taskevents"
 	_ "github.com/hashicorp/nomad/e2e/volumes"
 )
 func TestE2E(t *testing.T) {
--- a/e2e/e2eutil/allocs.go
+++ b/e2e/e2eutil/allocs.go
@ -144,3 +144,21 @@ func AllocStatusesRescheduled(jobID string) ([]string, error) {
 	}
 	return statuses, nil
 }
 // AllocExec is a convenience wrapper that runs 'nomad alloc exec' with the
 // passed cmd via '/bin/sh -c', retrying if the task isn't ready
 func AllocExec(allocID, taskID string, cmd string, wc *WaitConfig) (string, error) {
 	var got string
 	var err error
 	interval, retries := wc.OrDefault()
 	args := []string{"alloc", "exec", "-task", taskID, allocID, "/bin/sh", "-c", cmd}
 	testutil.WaitForResultRetries(retries, func() (bool, error) {
 		time.Sleep(interval)
 		got, err = Command("nomad", args...)
 		return err == nil, err
 	}, func(e error) {
 		err = fmt.Errorf("exec failed: 'nomad %s'", strings.Join(args, " "))
 	})
 	return got, err
 }
--- a/e2e/hostvolumes/host_volumes.go
+++ b/e2e/hostvolumes/host_volumes.go
@ -1,132 +0,0 @@
 package hostvolumes
 import (
 	"time"
 	"github.com/hashicorp/nomad/e2e/e2eutil"
 	"github.com/hashicorp/nomad/e2e/framework"
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/stretchr/testify/require"
 )
 type BasicHostVolumeTest struct {
 	framework.TC
 	jobIds []string
 }
 func init() {
 	framework.AddSuites(&framework.TestSuite{
 		Component:   "Host Volumes",
 		CanRunLocal: true,
 		Cases: []framework.TestCase{
 			new(BasicHostVolumeTest),
 		},
 	})
 }
 func (tc *BasicHostVolumeTest) BeforeAll(f *framework.F) {
 	// Ensure cluster has leader before running tests
 	e2eutil.WaitForLeader(f.T(), tc.Nomad())
 	// Ensure that we have at least 1 client nodes in ready state
 	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
 }
 func (tc *BasicHostVolumeTest) TestSingleHostVolume(f *framework.F) {
 	require := require.New(f.T())
 	nomadClient := tc.Nomad()
 	uuid := uuid.Generate()
 	jobID := "hostvol" + uuid[0:8]
 	tc.jobIds = append(tc.jobIds, jobID)
 	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, "hostvolumes/input/single_mount.nomad", jobID, "")
 	waitForTaskState := func(desiredState string) {
 		require.Eventually(func() bool {
 			allocs, _, _ := nomadClient.Jobs().Allocations(jobID, false, nil)
 			if len(allocs) != 1 {
 				return false
 			}
 			first := allocs[0]
 			taskState := first.TaskStates["test"]
 			if taskState == nil {
 				return false
 			}
 			return taskState.State == desiredState
 		}, 30*time.Second, 1*time.Second)
 	}
 	waitForClientAllocStatus := func(desiredStatus string) {
 		require.Eventually(func() bool {
 			allocSummaries, _, _ := nomadClient.Jobs().Allocations(jobID, false, nil)
 			if len(allocSummaries) != 1 {
 				return false
 			}
 			alloc, _, _ := nomadClient.Allocations().Info(allocSummaries[0].ID, nil)
 			if alloc == nil {
 				return false
 			}
 			return alloc.ClientStatus == desiredStatus
 		}, 30*time.Second, 1*time.Second)
 	}
 	waitForRestartCount := func(desiredCount uint64) {
 		require.Eventually(func() bool {
 			allocs, _, _ := nomadClient.Jobs().Allocations(jobID, false, nil)
 			if len(allocs) != 1 {
 				return false
 			}
 			first := allocs[0]
 			return first.TaskStates["test"].Restarts == desiredCount
 		}, 30*time.Second, 1*time.Second)
 	}
 	// Verify scheduling
 	for _, allocStub := range allocs {
 		node, _, err := nomadClient.Nodes().Info(allocStub.NodeID, nil)
 		require.Nil(err)
 		_, ok := node.HostVolumes["shared_data"]
 		require.True(ok, "Node does not have the requested volume")
 	}
 	// Wrap in retry to wait until running
 	waitForTaskState(structs.TaskStateRunning)
 	// Client should be running
 	waitForClientAllocStatus(structs.AllocClientStatusRunning)
 	// Should not be restarted
 	waitForRestartCount(0)
 	// Ensure allocs can be restarted
 	for _, allocStub := range allocs {
 		alloc, _, err := nomadClient.Allocations().Info(allocStub.ID, nil)
 		require.Nil(err)
 		err = nomadClient.Allocations().Restart(alloc, "", nil)
 		require.Nil(err)
 	}
 	// Should be restarted once
 	waitForRestartCount(1)
 	// Wrap in retry to wait until running again
 	waitForTaskState(structs.TaskStateRunning)
 	// Client should be running again
 	waitForClientAllocStatus(structs.AllocClientStatusRunning)
 }
 func (tc *BasicHostVolumeTest) AfterEach(f *framework.F) {
 	nomadClient := tc.Nomad()
 	jobs := nomadClient.Jobs()
 	// Stop all jobs in test
 	for _, id := range tc.jobIds {
 		jobs.Deregister(id, true, nil)
 	}
 	// Garbage collect
 	nomadClient.System().GarbageCollect()
 }
--- a/e2e/hostvolumes/input/single_mount.nomad
+++ b/e2e/hostvolumes/input/single_mount.nomad
@ -1,34 +0,0 @@
 job "test1" {
  datacenters = ["dc1", "dc2"]
  type        = "service"
  constraint {
    attribute = "${attr.kernel.name}"
    value     = "linux"
  }
  group "test1" {
    count = 1
    volume "data" {
      type   = "host"
      source = "shared_data"
    }
    task "test" {
      driver = "docker"
      volume_mount {
        volume      = "data"
        destination = "/tmp/foo"
      }
      config {
        image = "bash:latest"
        command = "bash"
        args    = ["-c", "sleep 15000"]
      }
    }
  }
 }
--- a/e2e/volumes/input/volumes.nomad
+++ b/e2e/volumes/input/volumes.nomad
@ -0,0 +1,104 @@
 job "volumes" {
  datacenters = ["dc1", "dc2"]
  group "group" {
    volume "data" {
      type   = "host"
      source = "shared_data"
    }
    task "docker_task" {
      driver = "docker"
      config {
        image   = "busybox:1"
        command = "/bin/sh"
        args    = ["/usr/local/bin/myapplication.sh"]
        mounts = [
          # this mount binds the task's own NOMAD_TASK_DIR directory as the
          # source, letting us map it to a more convenient location; this is a
          # frequently-used way to get templates into an arbitrary location in
          # the task for Docker
          {
            type     = "bind"
            source   = "local"
            target   = "/usr/local/bin"
            readonly = true
          }
        ]
      }
      # this is the host volume mount, which we'll write into in our task to
      # ensure we have persistent data
      volume_mount {
        volume      = "data"
        destination = "/tmp/foo"
      }
      template {
        data = <<EOT
 #!/bin/sh
 echo ${NOMAD_ALLOC_ID} > /tmp/foo/${NOMAD_ALLOC_ID}
 sleep 3600
 EOT
        # this path is relative to the allocation's task directory:
        # /var/nomad/alloc/:alloc_id/:task_name
        # but Docker tasks can't see this folder except for the bind-mounted
        # directories inside it (./local ./secrets ./tmp)
        # so the only reason this works to write our script to execute from
        # /usr/local/bin is because of the 'mounts' section above.
        destination = "local/myapplication.sh"
      }
      resources {
        cpu    = 256
        memory = 128
      }
    }
    task "exec_task" {
      driver = "exec"
      config {
        command = "/bin/sh"
        args    = ["/usr/local/bin/myapplication.sh"]
      }
      # host volumes for exec tasks are more limited, so we're only going to read
      # data that the other task places there
      #
      # - we can't write unless the nobody user has permissions to write there
      # - we can't template into this location because the host_volume mounts
      #   over the template (see https://github.com/hashicorp/nomad/issues/7796)
      volume_mount {
        volume      = "data"
        destination = "/tmp/foo"
        read_only   = true
      }
      template {
        data = <<EOT
 #!/bin/sh
 sleep 3600
 EOT
        # this path is relative to the allocation's task directory:
        # /var/nomad/alloc/:alloc_id/:task_name
        # which is the same as the root directory for exec tasks.
        # we just need to make sure this doesn't collide with the
        # chroot: https://www.nomadproject.io/docs/drivers/exec#chroot
        destination = "usr/local/bin/myapplication.sh"
      }
      resources {
        cpu    = 256
        memory = 128
      }
    }
  }
 }
--- a/e2e/volumes/volumes.go
+++ b/e2e/volumes/volumes.go
@ -0,0 +1,116 @@
 package volumes
 import (
 	"fmt"
 	"os"
 	"github.com/hashicorp/nomad/api"
 	e2e "github.com/hashicorp/nomad/e2e/e2eutil"
 	"github.com/hashicorp/nomad/e2e/framework"
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/jobspec"
 )
 type VolumesTest struct {
 	framework.TC
 	jobIDs []string
 }
 func init() {
 	framework.AddSuites(&framework.TestSuite{
 		Component:   "Volumes",
 		CanRunLocal: true,
 		Cases: []framework.TestCase{
 			new(VolumesTest),
 		},
 	})
 }
 func (tc *VolumesTest) BeforeAll(f *framework.F) {
 	e2e.WaitForLeader(f.T(), tc.Nomad())
 	e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
 }
 func (tc *VolumesTest) AfterEach(f *framework.F) {
 	if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
 		return
 	}
 	for _, id := range tc.jobIDs {
 		_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
 		f.NoError(err)
 	}
 	tc.jobIDs = []string{}
 	_, err := e2e.Command("nomad", "system", "gc")
 	f.NoError(err)
 }
 // TestVolumeMounts exercises host volume and Docker volume functionality for
 // the exec and docker task driver, particularly around mounting locations
 // within the container and how this is exposed to the user.
 func (tc *VolumesTest) TestVolumeMounts(f *framework.F) {
 	jobID := "test-node-drain-" + uuid.Generate()[0:8]
 	f.NoError(e2e.Register(jobID, "volumes/input/volumes.nomad"))
 	tc.jobIDs = append(tc.jobIDs, jobID)
 	expected := []string{"running"}
 	f.NoError(e2e.WaitForAllocStatusExpected(jobID, expected), "job should be running")
 	allocs, err := e2e.AllocsForJob(jobID)
 	f.NoError(err, "could not get allocs for job")
 	allocID := allocs[0]["ID"]
 	nodeID := allocs[0]["Node ID"]
 	cmdToExec := fmt.Sprintf("cat /tmp/foo/%s", allocID)
 	out, err := e2e.AllocExec(allocID, "docker_task", cmdToExec, nil)
 	f.NoError(err, "could not exec into task: docker_task")
 	f.Equal(out, allocID+"\n", "alloc data is missing from docker_task")
 	out, err = e2e.AllocExec(allocID, "exec_task", cmdToExec, nil)
 	f.NoError(err, "could not exec into task: exec_task")
 	f.Equal(out, allocID+"\n", "alloc data is missing from exec_task")
 	_, err = e2e.Command("nomad", "job", "stop", jobID)
 	f.NoError(err, "could not stop job")
 	// modify the job so that we make sure it's placed back on the same host.
 	// we want to be able to verify that the data from the previous alloc is
 	// still there
 	job, err := jobspec.ParseFile("volumes/input/volumes.nomad")
 	f.NoError(err)
 	job.ID = &jobID
 	job.Constraints = []*api.Constraint{
 		{
 			LTarget: "${node.unique.id}",
 			RTarget: nodeID,
 			Operand: "=",
 		},
 	}
 	_, _, err = tc.Nomad().Jobs().Register(job, nil)
 	f.NoError(err, "could not register updated job")
 	allocs, err = e2e.AllocsForJob(jobID)
 	f.NoError(err, "could not get allocs for job")
 	newAllocID := allocs[0]["ID"]
 	newCmdToExec := fmt.Sprintf("cat /tmp/foo/%s", newAllocID)
 	out, err = e2e.AllocExec(newAllocID, "docker_task", cmdToExec, nil)
 	f.NoError(err, "could not exec into task: docker_task")
 	f.Equal(out, allocID+"\n", "previous alloc data is missing from docker_task")
 	out, err = e2e.AllocExec(newAllocID, "docker_task", newCmdToExec, nil)
 	f.NoError(err, "could not exec into task: docker_task")
 	f.Equal(out, newAllocID+"\n", "new alloc data is missing from docker_task")
 	out, err = e2e.AllocExec(newAllocID, "exec_task", cmdToExec, nil)
 	f.NoError(err, "could not exec into task: exec_task")
 	f.Equal(out, allocID+"\n", "previous alloc data is missing from exec_task")
 	out, err = e2e.AllocExec(newAllocID, "exec_task", newCmdToExec, nil)
 	f.NoError(err, "could not exec into task: exec_task")
 	f.Equal(out, newAllocID+"\n", "new alloc data is missing from exec_task")
 }