E2E: clarify drain `-deadline` and `-force` flag behaviors (#16868)

The `-deadline` and `-force` flag for the `nomad node drain` command only cause the draining to ignore the `migrate` block's healthy deadline, max parallel, etc. These flags don't have anything to do with the `kill_timeout` or `shutdown_delay` options of the jobspec. This changeset fixes the skipped E2E tests so that they validate the intended behavior, and updates the docs for more clarity.
2023-04-12 15:27:24 -04:00 · 2023-04-12 15:27:24 -04:00 · 4df2d9bda8
parent ec1a8ae12a
commit 4df2d9bda8
5 changed files with 166 additions and 230 deletions
--- a/e2e/e2e_test.go
+++ b/e2e/e2e_test.go
@ -24,7 +24,6 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/lifecycle"
 	_ "github.com/hashicorp/nomad/e2e/metrics"
 	_ "github.com/hashicorp/nomad/e2e/networking"
-	_ "github.com/hashicorp/nomad/e2e/nodedrain"
 	_ "github.com/hashicorp/nomad/e2e/nomadexec"
 	_ "github.com/hashicorp/nomad/e2e/oversubscription"
 	_ "github.com/hashicorp/nomad/e2e/parameterized"
@ -45,6 +44,7 @@ import (
 	// we get a quick check that they compile on every commit
 	_ "github.com/hashicorp/nomad/e2e/disconnectedclients"
 	_ "github.com/hashicorp/nomad/e2e/namespaces"
+	_ "github.com/hashicorp/nomad/e2e/nodedrain"
 	_ "github.com/hashicorp/nomad/e2e/volumes"
 )

--- a/e2e/nodedrain/input/drain_deadline.nomad
+++ b/e2e/nodedrain/input/drain_deadline.nomad
@ -2,40 +2,33 @@
 # SPDX-License-Identifier: MPL-2.0

 job "drain_deadline" {
-  datacenters = ["dc1", "dc2"]

  constraint {
    attribute = "${attr.kernel.name}"
    value     = "linux"
  }

+  migrate {
+    max_parallel     = 1
+    min_healthy_time = "30s"
+  }
+
  group "group" {

+    count = 2
+
    task "task" {
      driver = "docker"

-      kill_timeout = "2m"
-
      config {
        image   = "busybox:1"
        command = "/bin/sh"
-        args    = ["local/script.sh"]
-      }
-
-      template {
-        data = <<EOF
-#!/bin/sh
-trap 'sleep 60' 2
-sleep 600
-EOF
-
-        destination = "local/script.sh"
-        change_mode = "noop"
+        args    = ["-c", "sleep 600"]
      }

      resources {
        cpu    = 256
-        memory = 128
+        memory = 64
      }
    }
  }
--- a/e2e/nodedrain/node_drain_test.go
+++ b/e2e/nodedrain/node_drain_test.go
@ -30,6 +30,8 @@ func TestNodeDrain(t *testing.T) {
 	t.Run("IgnoreSystem", testIgnoreSystem)
 	t.Run("EphemeralMigrate", testEphemeralMigrate)
 	t.Run("KeepIneligible", testKeepIneligible)
+	t.Run("DeadlineFlag", testDeadlineFlag)
+	t.Run("ForceFlag", testForceFlag)
 }

 // testIgnoreSystem tests that system jobs are left behind when the
@ -51,12 +53,15 @@ func testIgnoreSystem(t *testing.T) {
 	// Run a system job, which will not be moved when we drain the node
 	systemJobID := "test-node-drain-system-" + uuid.Short()
 	t.Cleanup(cleanupJobState(t, systemJobID))
-	registerAndWaitForRunning(t, nomadClient, systemJobID, "./input/drain_ignore_system.nomad", count)
+
+	must.NoError(t, e2eutil.Register(systemJobID, "./input/drain_ignore_system.nomad"))
+	waitForRunningAllocs(t, nomadClient, systemJobID, count)

 	// Also run a service job so we can verify when the drain is done
 	serviceJobID := "test-node-drain-service-" + uuid.Short()
 	t.Cleanup(cleanupJobState(t, serviceJobID))
-	serviceAllocs := registerAndWaitForRunning(t, nomadClient, serviceJobID, "./input/drain_simple.nomad", 1)
+	must.NoError(t, e2eutil.Register(serviceJobID, "./input/drain_simple.nomad"))
+	serviceAllocs := waitForRunningAllocs(t, nomadClient, serviceJobID, 1)
 	oldAllocID := serviceAllocs[0].ID
 	oldNodeID := serviceAllocs[0].NodeID

@ -67,7 +72,8 @@ func testIgnoreSystem(t *testing.T) {
 	must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out))

 	// The service job should be drained
-	newAllocs := waitForAllocDrain(t, nomadClient, serviceJobID, oldAllocID, oldNodeID)
+	newAllocs := waitForAllocDrainComplete(t, nomadClient, serviceJobID,
+		oldAllocID, oldNodeID, time.Second*120)
 	must.Len(t, 1, newAllocs, must.Sprint("expected 1 new service job alloc"))

 	// The system job should not have been drained
@ -92,7 +98,8 @@ func testEphemeralMigrate(t *testing.T) {
 	nomadClient := e2eutil.NomadClient(t)
 	jobID := "drain-migrate-" + uuid.Short()

-	allocs := registerAndWaitForRunning(t, nomadClient, jobID, "./input/drain_migrate.nomad", 1)
+	must.NoError(t, e2eutil.Register(jobID, "./input/drain_migrate.nomad"))
+	allocs := waitForRunningAllocs(t, nomadClient, jobID, 1)
 	t.Cleanup(cleanupJobState(t, jobID))
 	oldAllocID := allocs[0].ID
 	oldNodeID := allocs[0].NodeID
@ -117,7 +124,8 @@ func testEphemeralMigrate(t *testing.T) {
 	out, err := e2eutil.Command("nomad", "node", "drain", "-enable", "-yes", "-detach", oldNodeID)
 	must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out))

-	newAllocs := waitForAllocDrain(t, nomadClient, jobID, oldAllocID, oldNodeID)
+	newAllocs := waitForAllocDrainComplete(t, nomadClient, jobID,
+		oldAllocID, oldNodeID, time.Second*120)
 	must.Len(t, 1, newAllocs, must.Sprint("expected 1 new alloc"))
 	newAllocID := newAllocs[0].ID
 	newNodeID := newAllocs[0].NodeID
@ -176,42 +184,129 @@ func testKeepIneligible(t *testing.T) {
 	}
 }

-// registerAndWaitForRunning registers a job and waits for the expected number
-// of allocations to be in a running state. Returns the allocations.
-func registerAndWaitForRunning(t *testing.T, nomadClient *api.Client, jobID, jobSpec string, expectedCount int) []*api.AllocationListStub {
+// testDeadlineFlag tests the enforcement of the node drain deadline so that
+// allocations are moved even if max_parallel says we should be waiting
+func testDeadlineFlag(t *testing.T) {
+
+	nomadClient := e2eutil.NomadClient(t)
+	t.Cleanup(cleanupDrainState(t))
+
+	jobID := "test-node-drain-" + uuid.Short()
+	must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
+	allocs := waitForRunningAllocs(t, nomadClient, jobID, 2)
+
+	t.Cleanup(cleanupJobState(t, jobID))
+	oldAllocID1 := allocs[0].ID
+	oldNodeID1 := allocs[0].NodeID
+	oldAllocID2 := allocs[1].ID
+	oldNodeID2 := allocs[1].NodeID
+
+	t.Logf("draining nodes %s, %s", oldNodeID1, oldNodeID2)
+	out, err := e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID1)
+	must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
+	out, err = e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID2)
+	must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
+
+	out, err = e2eutil.Command(
+		"nomad", "node", "drain",
+		"-deadline", "1s",
+		"-enable", "-yes", "-detach", oldNodeID1)
+	must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID1, err, out))
+
+	out, err = e2eutil.Command(
+		"nomad", "node", "drain",
+		"-deadline", "1s",
+		"-enable", "-yes", "-detach", oldNodeID2)
+	must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID2, err, out))
+
+	// with max_parallel=1 and min_healthy_time=30s we'd expect it to take ~60
+	// for both to be marked complete. Instead, because of the -deadline flag
+	// we'll expect the allocs to be stoppped almost immediately (give it 10s to
+	// avoid flakiness), and then the new allocs should come up and get marked
+	// healthy after ~30s
+	t.Log("waiting for old allocs to stop")
+	waitForAllocsStop(t, nomadClient, time.Second*10, oldAllocID1, oldAllocID2)
+
+	t.Log("waiting for running allocs")
+	waitForRunningAllocs(t, nomadClient, jobID, 2)
+}
+
+// testForceFlag tests the enforcement of the node drain -force flag so that
+// allocations are terminated immediately.
+func testForceFlag(t *testing.T) {
+
+	nomadClient := e2eutil.NomadClient(t)
+	t.Cleanup(cleanupDrainState(t))
+
+	jobID := "test-node-drain-" + uuid.Short()
+	must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
+	allocs := waitForRunningAllocs(t, nomadClient, jobID, 2)
+
+	t.Cleanup(cleanupJobState(t, jobID))
+	oldAllocID1 := allocs[0].ID
+	oldNodeID1 := allocs[0].NodeID
+	oldAllocID2 := allocs[1].ID
+	oldNodeID2 := allocs[1].NodeID
+
+	t.Logf("draining nodes %s, %s", oldNodeID1, oldNodeID2)
+	out, err := e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID1)
+	must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
+	out, err = e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID2)
+	must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
+
+	out, err = e2eutil.Command(
+		"nomad", "node", "drain", "-force",
+		"-enable", "-yes", "-detach", oldNodeID1)
+	must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID1, err, out))
+
+	out, err = e2eutil.Command(
+		"nomad", "node", "drain", "-force",
+		"-enable", "-yes", "-detach", oldNodeID2)
+	must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID2, err, out))
+
+	// with max_parallel=1 and min_healthy_time=30s we'd expect it to take ~60
+	// for both to be marked complete. Instead, because of the -force flag
+	// we'll expect the allocs to be stoppped almost immediately (give it 10s to
+	// avoid flakiness), and then the new allocs should come up and get marked
+	// healthy after ~30s
+	t.Log("waiting for old allocs to stop")
+	waitForAllocsStop(t, nomadClient, time.Second*10, oldAllocID1, oldAllocID2)
+
+	t.Log("waiting for running allocs")
+	waitForRunningAllocs(t, nomadClient, jobID, 2)
+}
+
+func waitForRunningAllocs(t *testing.T, nomadClient *api.Client, jobID string, expectedRunningCount int) []*api.AllocationListStub {
 	t.Helper()

-	var allocs []*api.AllocationListStub
-	var err error
-	must.NoError(t, e2eutil.Register(jobID, jobSpec))
+	runningAllocs := set.From([]*api.AllocationListStub{})

 	must.Wait(t, wait.InitialSuccess(
 		wait.ErrorFunc(func() error {
-			allocs, _, err = nomadClient.Jobs().Allocations(jobID, false, nil)
-			if err != nil {
-				return fmt.Errorf("expected no error listing allocs: %v", err)
-			}
-			if len(allocs) != expectedCount {
-				return fmt.Errorf("expected %d allocs but found %d", expectedCount, len(allocs))
-			}
+			allocs, _, err := nomadClient.Jobs().Allocations(jobID, false, nil)
+			must.NoError(t, err)
+			count := 0
 			for _, alloc := range allocs {
-				if alloc.ClientStatus != structs.AllocClientStatusRunning {
-					return fmt.Errorf("alloc %q was %q, not running", alloc.ID, alloc.ClientStatus)
+				if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					runningAllocs.Insert(alloc)
 				}
 			}
+			if runningAllocs.Size() < expectedRunningCount {
+				return fmt.Errorf("expected %d running allocs, got %d", expectedRunningCount, count)
+			}
 			return nil
 		}),
 		wait.Timeout(60*time.Second),
 		wait.Gap(500*time.Millisecond),
 	))
-	return allocs
+	return runningAllocs.Slice()
 }

-// waitForAllocDrain polls the allocation statues for a job until we've finished
+// waitForAllocDrainComplete polls the allocation statues for a job until we've finished
 // migrating:
 // - the old alloc should be stopped
 // - the new alloc should be running
-func waitForAllocDrain(t *testing.T, nomadClient *api.Client, jobID, oldAllocID, oldNodeID string) []*api.AllocationListStub {
+func waitForAllocDrainComplete(t *testing.T, nomadClient *api.Client, jobID, oldAllocID, oldNodeID string, deadline time.Duration) []*api.AllocationListStub {

 	t.Helper()
 	newAllocs := set.From([]*api.AllocationListStub{})
@ -243,16 +338,45 @@ func waitForAllocDrain(t *testing.T, nomadClient *api.Client, jobID, oldAllocID,
 				oldNodeID[:8], time.Now().Sub(start))
 			return nil
 		}),
-		wait.Timeout(120*time.Second),
+		wait.Timeout(deadline),
 		wait.Gap(500*time.Millisecond),
 	))

 	return newAllocs.Slice()
 }

+// waitForAllocsStop polls the allocation statues for specific allocations until
+// they've stopped
+func waitForAllocsStop(t *testing.T, nomadClient *api.Client, deadline time.Duration, oldAllocIDs ...string) {
+	t.Helper()
+
+	must.Wait(t, wait.InitialSuccess(
+		wait.ErrorFunc(func() error {
+			for _, allocID := range oldAllocIDs {
+				alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
+				must.NoError(t, err)
+				if alloc.ClientStatus != structs.AllocClientStatusComplete {
+					return fmt.Errorf("expected alloc %s to be complete, got %q",
+						allocID[:8], alloc.ClientStatus)
+				}
+			}
+			return nil
+		}),
+		wait.Timeout(deadline),
+		wait.Gap(500*time.Millisecond),
+	))
+}
+
 func cleanupJobState(t *testing.T, jobID string) func() {
 	return func() {
-		_, err := e2eutil.Command("nomad", "job", "stop", "-purge", jobID)
+		if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
+			return
+		}
+
+		// we can't use the CLI here because some tests will stop the job during
+		// a running deployment, which returns a non-zero exit code
+		nomadClient := e2eutil.NomadClient(t)
+		_, _, err := nomadClient.Jobs().Deregister(jobID, true, nil)
 		test.NoError(t, err)
 	}
 }
--- a/e2e/nodedrain/nodedrain.go
+++ b/e2e/nodedrain/nodedrain.go
@ -1,183 +0,0 @@
-// Copyright (c) HashiCorp, Inc.
-// SPDX-License-Identifier: MPL-2.0
-
-package nodedrain
-
-import (
-	"fmt"
-	"os"
-	"time"
-
-	e2e "github.com/hashicorp/nomad/e2e/e2eutil"
-	"github.com/hashicorp/nomad/e2e/framework"
-	"github.com/hashicorp/nomad/helper/uuid"
-	"github.com/hashicorp/nomad/testutil"
-)
-
-const ns = ""
-
-type NodeDrainE2ETest struct {
-	framework.TC
-	jobIDs  []string
-	nodeIDs []string
-}
-
-func init() {
-	framework.AddSuites(&framework.TestSuite{
-		Component:   "NodeDrain",
-		CanRunLocal: true,
-		Consul:      true,
-		Cases: []framework.TestCase{
-			new(NodeDrainE2ETest),
-		},
-	})
-
-}
-
-func (tc *NodeDrainE2ETest) BeforeAll(f *framework.F) {
-	e2e.WaitForLeader(f.T(), tc.Nomad())
-	e2e.WaitForNodesReady(f.T(), tc.Nomad(), 2) // needs at least 2 to test migration
-}
-
-func (tc *NodeDrainE2ETest) AfterEach(f *framework.F) {
-	if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
-		return
-	}
-
-	for _, id := range tc.jobIDs {
-		_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
-		f.Assert().NoError(err)
-	}
-	tc.jobIDs = []string{}
-
-	for _, id := range tc.nodeIDs {
-		_, err := e2e.Command("nomad", "node", "drain", "-disable", "-yes", id)
-		f.Assert().NoError(err)
-		_, err = e2e.Command("nomad", "node", "eligibility", "-enable", id)
-		f.Assert().NoError(err)
-	}
-	tc.nodeIDs = []string{}
-
-	_, err := e2e.Command("nomad", "system", "gc")
-	f.Assert().NoError(err)
-}
-
-func nodesForJob(jobID string) ([]string, error) {
-	allocs, err := e2e.AllocsForJob(jobID, ns)
-	if err != nil {
-		return nil, err
-	}
-	if len(allocs) < 1 {
-		return nil, fmt.Errorf("no allocs found for job: %v", jobID)
-	}
-	nodes := []string{}
-	for _, alloc := range allocs {
-		nodes = append(nodes, alloc["Node ID"])
-	}
-	return nodes, nil
-}
-
-// waitForNodeDrain is a convenience wrapper that polls 'node status'
-// until the comparison function over the state of the job's allocs on that
-// node returns true
-func waitForNodeDrain(nodeID string, comparison func([]map[string]string) bool, wc *e2e.WaitConfig) error {
-	var got []map[string]string
-	var err error
-	interval, retries := wc.OrDefault()
-	testutil.WaitForResultRetries(retries, func() (bool, error) {
-		time.Sleep(interval)
-		got, err = e2e.AllocsForNode(nodeID)
-		if err != nil {
-			return false, err
-		}
-		return comparison(got), nil
-	}, func(e error) {
-		err = fmt.Errorf("node drain status check failed: %v\n%#v", e, got)
-	})
-	return err
-}
-
-// TestNodeDrainDeadline tests the enforcement of the node drain deadline so
-// that allocations are terminated even if they haven't gracefully exited.
-func (tc *NodeDrainE2ETest) TestNodeDrainDeadline(f *framework.F) {
-	f.T().Skip("The behavior is unclear and test assertions don't capture intent.  Issue 9902")
-
-	jobID := "test-node-drain-" + uuid.Generate()[0:8]
-	f.NoError(e2e.Register(jobID, "nodedrain/input/drain_deadline.nomad"))
-	tc.jobIDs = append(tc.jobIDs, jobID)
-
-	expected := []string{"running"}
-	f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")
-
-	nodes, err := nodesForJob(jobID)
-	f.NoError(err, "could not get nodes for job")
-	f.Len(nodes, 1, "could not get nodes for job")
-	nodeID := nodes[0]
-
-	f.T().Logf("draining node %v", nodeID)
-	out, err := e2e.Command(
-		"nomad", "node", "drain",
-		"-deadline", "5s",
-		"-enable", "-yes", "-detach", nodeID)
-	f.NoError(err, fmt.Sprintf("'nomad node drain %v' failed: %v\n%v", nodeID, err, out))
-	tc.nodeIDs = append(tc.nodeIDs, nodeID)
-
-	// the deadline is 40s but we can't guarantee its instantly terminated at
-	// that point, so we give it 30s which is well under the 2m kill_timeout in
-	// the job.
-	// deadline here needs to account for scheduling and propagation delays.
-	f.NoError(waitForNodeDrain(nodeID,
-		func(got []map[string]string) bool {
-			// FIXME: check the drain job alloc specifically. test
-			// may pass if client had another completed alloc
-			for _, alloc := range got {
-				if alloc["Status"] == "complete" {
-					return true
-				}
-			}
-			return false
-		}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
-	), "node did not drain immediately following deadline")
-}
-
-// TestNodeDrainForce tests the enforcement of the node drain -force flag so
-// that allocations are terminated immediately.
-func (tc *NodeDrainE2ETest) TestNodeDrainForce(f *framework.F) {
-	f.T().Skip("The behavior is unclear and test assertions don't capture intent.  Issue 9902")
-
-	jobID := "test-node-drain-" + uuid.Generate()[0:8]
-	f.NoError(e2e.Register(jobID, "nodedrain/input/drain_deadline.nomad"))
-	tc.jobIDs = append(tc.jobIDs, jobID)
-
-	expected := []string{"running"}
-	f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")
-
-	nodes, err := nodesForJob(jobID)
-	f.NoError(err, "could not get nodes for job")
-	f.Len(nodes, 1, "could not get nodes for job")
-	nodeID := nodes[0]
-
-	out, err := e2e.Command(
-		"nomad", "node", "drain",
-		"-force",
-		"-enable", "-yes", "-detach", nodeID)
-	f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
-	tc.nodeIDs = append(tc.nodeIDs, nodeID)
-
-	// we've passed -force but we can't guarantee its instantly terminated at
-	// that point, so we give it 30s which is under the 2m kill_timeout in
-	// the job
-	f.NoError(waitForNodeDrain(nodeID,
-		func(got []map[string]string) bool {
-			// FIXME: check the drain job alloc specifically. test
-			// may pass if client had another completed alloc
-			for _, alloc := range got {
-				if alloc["Status"] == "complete" {
-					return true
-				}
-			}
-			return false
-		}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
-	), "node did not drain immediately when forced")
-
-}
--- a/website/content/docs/commands/node/drain.mdx
+++ b/website/content/docs/commands/node/drain.mdx
@ -57,17 +57,19 @@ capability.
 - `-disable`: Disable node drain mode.

 - `-deadline`: Set the deadline by which all allocations must be moved off the
-  node. Remaining allocations after the deadline are force removed from the
-  node. Defaults to 1 hour.
+  node. Remaining allocations after the deadline are removed from the node,
+  regardless of their [`migrate`][] block. Defaults to 1 hour.

 - `-detach`: Return immediately instead of entering monitor mode.

 - `-monitor`: Enter monitor mode directly without modifying the drain status.

- `-force`: Force remove allocations off the node immediately.
+- `-force`: Remove allocations off the node immediately, regardless of the
+  allocation's [`migrate`][] block.

- `-no-deadline`: No deadline allows the allocations to drain off the node
-  without being force stopped after a certain deadline.
+- `-no-deadline`: No deadline allows the allocations to drain off the node,
+  ignoring the default 1 hour deadline before allocations are removed regardless
+  of their [`migrate`][] block.

 - `-ignore-system`: Ignore system allows the drain to complete without
  stopping system job allocations. By default system jobs (and CSI