E2E: clarify drain -deadline and -force flag behaviors (#16868)

The `-deadline` and `-force` flag for the `nomad node drain` command only cause
the draining to ignore the `migrate` block's healthy deadline, max parallel,
etc. These flags don't have anything to do with the `kill_timeout` or
`shutdown_delay` options of the jobspec.

This changeset fixes the skipped E2E tests so that they validate the intended
behavior, and updates the docs for more clarity.
This commit is contained in:
Tim Gross 2023-04-12 15:27:24 -04:00 committed by GitHub
parent ec1a8ae12a
commit 4df2d9bda8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 166 additions and 230 deletions

View file

@ -24,7 +24,6 @@ import (
_ "github.com/hashicorp/nomad/e2e/lifecycle" _ "github.com/hashicorp/nomad/e2e/lifecycle"
_ "github.com/hashicorp/nomad/e2e/metrics" _ "github.com/hashicorp/nomad/e2e/metrics"
_ "github.com/hashicorp/nomad/e2e/networking" _ "github.com/hashicorp/nomad/e2e/networking"
_ "github.com/hashicorp/nomad/e2e/nodedrain"
_ "github.com/hashicorp/nomad/e2e/nomadexec" _ "github.com/hashicorp/nomad/e2e/nomadexec"
_ "github.com/hashicorp/nomad/e2e/oversubscription" _ "github.com/hashicorp/nomad/e2e/oversubscription"
_ "github.com/hashicorp/nomad/e2e/parameterized" _ "github.com/hashicorp/nomad/e2e/parameterized"
@ -45,6 +44,7 @@ import (
// we get a quick check that they compile on every commit // we get a quick check that they compile on every commit
_ "github.com/hashicorp/nomad/e2e/disconnectedclients" _ "github.com/hashicorp/nomad/e2e/disconnectedclients"
_ "github.com/hashicorp/nomad/e2e/namespaces" _ "github.com/hashicorp/nomad/e2e/namespaces"
_ "github.com/hashicorp/nomad/e2e/nodedrain"
_ "github.com/hashicorp/nomad/e2e/volumes" _ "github.com/hashicorp/nomad/e2e/volumes"
) )

View file

@ -2,40 +2,33 @@
# SPDX-License-Identifier: MPL-2.0 # SPDX-License-Identifier: MPL-2.0
job "drain_deadline" { job "drain_deadline" {
datacenters = ["dc1", "dc2"]
constraint { constraint {
attribute = "${attr.kernel.name}" attribute = "${attr.kernel.name}"
value = "linux" value = "linux"
} }
migrate {
max_parallel = 1
min_healthy_time = "30s"
}
group "group" { group "group" {
count = 2
task "task" { task "task" {
driver = "docker" driver = "docker"
kill_timeout = "2m"
config { config {
image = "busybox:1" image = "busybox:1"
command = "/bin/sh" command = "/bin/sh"
args = ["local/script.sh"] args = ["-c", "sleep 600"]
}
template {
data = <<EOF
#!/bin/sh
trap 'sleep 60' 2
sleep 600
EOF
destination = "local/script.sh"
change_mode = "noop"
} }
resources { resources {
cpu = 256 cpu = 256
memory = 128 memory = 64
} }
} }
} }

View file

@ -30,6 +30,8 @@ func TestNodeDrain(t *testing.T) {
t.Run("IgnoreSystem", testIgnoreSystem) t.Run("IgnoreSystem", testIgnoreSystem)
t.Run("EphemeralMigrate", testEphemeralMigrate) t.Run("EphemeralMigrate", testEphemeralMigrate)
t.Run("KeepIneligible", testKeepIneligible) t.Run("KeepIneligible", testKeepIneligible)
t.Run("DeadlineFlag", testDeadlineFlag)
t.Run("ForceFlag", testForceFlag)
} }
// testIgnoreSystem tests that system jobs are left behind when the // testIgnoreSystem tests that system jobs are left behind when the
@ -51,12 +53,15 @@ func testIgnoreSystem(t *testing.T) {
// Run a system job, which will not be moved when we drain the node // Run a system job, which will not be moved when we drain the node
systemJobID := "test-node-drain-system-" + uuid.Short() systemJobID := "test-node-drain-system-" + uuid.Short()
t.Cleanup(cleanupJobState(t, systemJobID)) t.Cleanup(cleanupJobState(t, systemJobID))
registerAndWaitForRunning(t, nomadClient, systemJobID, "./input/drain_ignore_system.nomad", count)
must.NoError(t, e2eutil.Register(systemJobID, "./input/drain_ignore_system.nomad"))
waitForRunningAllocs(t, nomadClient, systemJobID, count)
// Also run a service job so we can verify when the drain is done // Also run a service job so we can verify when the drain is done
serviceJobID := "test-node-drain-service-" + uuid.Short() serviceJobID := "test-node-drain-service-" + uuid.Short()
t.Cleanup(cleanupJobState(t, serviceJobID)) t.Cleanup(cleanupJobState(t, serviceJobID))
serviceAllocs := registerAndWaitForRunning(t, nomadClient, serviceJobID, "./input/drain_simple.nomad", 1) must.NoError(t, e2eutil.Register(serviceJobID, "./input/drain_simple.nomad"))
serviceAllocs := waitForRunningAllocs(t, nomadClient, serviceJobID, 1)
oldAllocID := serviceAllocs[0].ID oldAllocID := serviceAllocs[0].ID
oldNodeID := serviceAllocs[0].NodeID oldNodeID := serviceAllocs[0].NodeID
@ -67,7 +72,8 @@ func testIgnoreSystem(t *testing.T) {
must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out)) must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out))
// The service job should be drained // The service job should be drained
newAllocs := waitForAllocDrain(t, nomadClient, serviceJobID, oldAllocID, oldNodeID) newAllocs := waitForAllocDrainComplete(t, nomadClient, serviceJobID,
oldAllocID, oldNodeID, time.Second*120)
must.Len(t, 1, newAllocs, must.Sprint("expected 1 new service job alloc")) must.Len(t, 1, newAllocs, must.Sprint("expected 1 new service job alloc"))
// The system job should not have been drained // The system job should not have been drained
@ -92,7 +98,8 @@ func testEphemeralMigrate(t *testing.T) {
nomadClient := e2eutil.NomadClient(t) nomadClient := e2eutil.NomadClient(t)
jobID := "drain-migrate-" + uuid.Short() jobID := "drain-migrate-" + uuid.Short()
allocs := registerAndWaitForRunning(t, nomadClient, jobID, "./input/drain_migrate.nomad", 1) must.NoError(t, e2eutil.Register(jobID, "./input/drain_migrate.nomad"))
allocs := waitForRunningAllocs(t, nomadClient, jobID, 1)
t.Cleanup(cleanupJobState(t, jobID)) t.Cleanup(cleanupJobState(t, jobID))
oldAllocID := allocs[0].ID oldAllocID := allocs[0].ID
oldNodeID := allocs[0].NodeID oldNodeID := allocs[0].NodeID
@ -117,7 +124,8 @@ func testEphemeralMigrate(t *testing.T) {
out, err := e2eutil.Command("nomad", "node", "drain", "-enable", "-yes", "-detach", oldNodeID) out, err := e2eutil.Command("nomad", "node", "drain", "-enable", "-yes", "-detach", oldNodeID)
must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out)) must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out))
newAllocs := waitForAllocDrain(t, nomadClient, jobID, oldAllocID, oldNodeID) newAllocs := waitForAllocDrainComplete(t, nomadClient, jobID,
oldAllocID, oldNodeID, time.Second*120)
must.Len(t, 1, newAllocs, must.Sprint("expected 1 new alloc")) must.Len(t, 1, newAllocs, must.Sprint("expected 1 new alloc"))
newAllocID := newAllocs[0].ID newAllocID := newAllocs[0].ID
newNodeID := newAllocs[0].NodeID newNodeID := newAllocs[0].NodeID
@ -176,42 +184,129 @@ func testKeepIneligible(t *testing.T) {
} }
} }
// registerAndWaitForRunning registers a job and waits for the expected number // testDeadlineFlag tests the enforcement of the node drain deadline so that
// of allocations to be in a running state. Returns the allocations. // allocations are moved even if max_parallel says we should be waiting
func registerAndWaitForRunning(t *testing.T, nomadClient *api.Client, jobID, jobSpec string, expectedCount int) []*api.AllocationListStub { func testDeadlineFlag(t *testing.T) {
nomadClient := e2eutil.NomadClient(t)
t.Cleanup(cleanupDrainState(t))
jobID := "test-node-drain-" + uuid.Short()
must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
allocs := waitForRunningAllocs(t, nomadClient, jobID, 2)
t.Cleanup(cleanupJobState(t, jobID))
oldAllocID1 := allocs[0].ID
oldNodeID1 := allocs[0].NodeID
oldAllocID2 := allocs[1].ID
oldNodeID2 := allocs[1].NodeID
t.Logf("draining nodes %s, %s", oldNodeID1, oldNodeID2)
out, err := e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID1)
must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
out, err = e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID2)
must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
out, err = e2eutil.Command(
"nomad", "node", "drain",
"-deadline", "1s",
"-enable", "-yes", "-detach", oldNodeID1)
must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID1, err, out))
out, err = e2eutil.Command(
"nomad", "node", "drain",
"-deadline", "1s",
"-enable", "-yes", "-detach", oldNodeID2)
must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID2, err, out))
// with max_parallel=1 and min_healthy_time=30s we'd expect it to take ~60
// for both to be marked complete. Instead, because of the -deadline flag
// we'll expect the allocs to be stoppped almost immediately (give it 10s to
// avoid flakiness), and then the new allocs should come up and get marked
// healthy after ~30s
t.Log("waiting for old allocs to stop")
waitForAllocsStop(t, nomadClient, time.Second*10, oldAllocID1, oldAllocID2)
t.Log("waiting for running allocs")
waitForRunningAllocs(t, nomadClient, jobID, 2)
}
// testForceFlag tests the enforcement of the node drain -force flag so that
// allocations are terminated immediately.
func testForceFlag(t *testing.T) {
nomadClient := e2eutil.NomadClient(t)
t.Cleanup(cleanupDrainState(t))
jobID := "test-node-drain-" + uuid.Short()
must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
allocs := waitForRunningAllocs(t, nomadClient, jobID, 2)
t.Cleanup(cleanupJobState(t, jobID))
oldAllocID1 := allocs[0].ID
oldNodeID1 := allocs[0].NodeID
oldAllocID2 := allocs[1].ID
oldNodeID2 := allocs[1].NodeID
t.Logf("draining nodes %s, %s", oldNodeID1, oldNodeID2)
out, err := e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID1)
must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
out, err = e2eutil.Command("nomad", "node", "eligibility", "-disable", oldNodeID2)
must.NoError(t, err, must.Sprintf("nomad node eligibility -disable failed: %v\n%v", err, out))
out, err = e2eutil.Command(
"nomad", "node", "drain", "-force",
"-enable", "-yes", "-detach", oldNodeID1)
must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID1, err, out))
out, err = e2eutil.Command(
"nomad", "node", "drain", "-force",
"-enable", "-yes", "-detach", oldNodeID2)
must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", oldNodeID2, err, out))
// with max_parallel=1 and min_healthy_time=30s we'd expect it to take ~60
// for both to be marked complete. Instead, because of the -force flag
// we'll expect the allocs to be stoppped almost immediately (give it 10s to
// avoid flakiness), and then the new allocs should come up and get marked
// healthy after ~30s
t.Log("waiting for old allocs to stop")
waitForAllocsStop(t, nomadClient, time.Second*10, oldAllocID1, oldAllocID2)
t.Log("waiting for running allocs")
waitForRunningAllocs(t, nomadClient, jobID, 2)
}
func waitForRunningAllocs(t *testing.T, nomadClient *api.Client, jobID string, expectedRunningCount int) []*api.AllocationListStub {
t.Helper() t.Helper()
var allocs []*api.AllocationListStub runningAllocs := set.From([]*api.AllocationListStub{})
var err error
must.NoError(t, e2eutil.Register(jobID, jobSpec))
must.Wait(t, wait.InitialSuccess( must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(func() error { wait.ErrorFunc(func() error {
allocs, _, err = nomadClient.Jobs().Allocations(jobID, false, nil) allocs, _, err := nomadClient.Jobs().Allocations(jobID, false, nil)
if err != nil { must.NoError(t, err)
return fmt.Errorf("expected no error listing allocs: %v", err) count := 0
}
if len(allocs) != expectedCount {
return fmt.Errorf("expected %d allocs but found %d", expectedCount, len(allocs))
}
for _, alloc := range allocs { for _, alloc := range allocs {
if alloc.ClientStatus != structs.AllocClientStatusRunning { if alloc.ClientStatus == structs.AllocClientStatusRunning {
return fmt.Errorf("alloc %q was %q, not running", alloc.ID, alloc.ClientStatus) runningAllocs.Insert(alloc)
} }
} }
if runningAllocs.Size() < expectedRunningCount {
return fmt.Errorf("expected %d running allocs, got %d", expectedRunningCount, count)
}
return nil return nil
}), }),
wait.Timeout(60*time.Second), wait.Timeout(60*time.Second),
wait.Gap(500*time.Millisecond), wait.Gap(500*time.Millisecond),
)) ))
return allocs return runningAllocs.Slice()
} }
// waitForAllocDrain polls the allocation statues for a job until we've finished // waitForAllocDrainComplete polls the allocation statues for a job until we've finished
// migrating: // migrating:
// - the old alloc should be stopped // - the old alloc should be stopped
// - the new alloc should be running // - the new alloc should be running
func waitForAllocDrain(t *testing.T, nomadClient *api.Client, jobID, oldAllocID, oldNodeID string) []*api.AllocationListStub { func waitForAllocDrainComplete(t *testing.T, nomadClient *api.Client, jobID, oldAllocID, oldNodeID string, deadline time.Duration) []*api.AllocationListStub {
t.Helper() t.Helper()
newAllocs := set.From([]*api.AllocationListStub{}) newAllocs := set.From([]*api.AllocationListStub{})
@ -243,16 +338,45 @@ func waitForAllocDrain(t *testing.T, nomadClient *api.Client, jobID, oldAllocID,
oldNodeID[:8], time.Now().Sub(start)) oldNodeID[:8], time.Now().Sub(start))
return nil return nil
}), }),
wait.Timeout(120*time.Second), wait.Timeout(deadline),
wait.Gap(500*time.Millisecond), wait.Gap(500*time.Millisecond),
)) ))
return newAllocs.Slice() return newAllocs.Slice()
} }
// waitForAllocsStop polls the allocation statues for specific allocations until
// they've stopped
func waitForAllocsStop(t *testing.T, nomadClient *api.Client, deadline time.Duration, oldAllocIDs ...string) {
t.Helper()
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(func() error {
for _, allocID := range oldAllocIDs {
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
must.NoError(t, err)
if alloc.ClientStatus != structs.AllocClientStatusComplete {
return fmt.Errorf("expected alloc %s to be complete, got %q",
allocID[:8], alloc.ClientStatus)
}
}
return nil
}),
wait.Timeout(deadline),
wait.Gap(500*time.Millisecond),
))
}
func cleanupJobState(t *testing.T, jobID string) func() { func cleanupJobState(t *testing.T, jobID string) func() {
return func() { return func() {
_, err := e2eutil.Command("nomad", "job", "stop", "-purge", jobID) if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}
// we can't use the CLI here because some tests will stop the job during
// a running deployment, which returns a non-zero exit code
nomadClient := e2eutil.NomadClient(t)
_, _, err := nomadClient.Jobs().Deregister(jobID, true, nil)
test.NoError(t, err) test.NoError(t, err)
} }
} }

View file

@ -1,183 +0,0 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package nodedrain
import (
"fmt"
"os"
"time"
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/testutil"
)
const ns = ""
type NodeDrainE2ETest struct {
framework.TC
jobIDs []string
nodeIDs []string
}
func init() {
framework.AddSuites(&framework.TestSuite{
Component: "NodeDrain",
CanRunLocal: true,
Consul: true,
Cases: []framework.TestCase{
new(NodeDrainE2ETest),
},
})
}
func (tc *NodeDrainE2ETest) BeforeAll(f *framework.F) {
e2e.WaitForLeader(f.T(), tc.Nomad())
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 2) // needs at least 2 to test migration
}
func (tc *NodeDrainE2ETest) AfterEach(f *framework.F) {
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}
for _, id := range tc.jobIDs {
_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
f.Assert().NoError(err)
}
tc.jobIDs = []string{}
for _, id := range tc.nodeIDs {
_, err := e2e.Command("nomad", "node", "drain", "-disable", "-yes", id)
f.Assert().NoError(err)
_, err = e2e.Command("nomad", "node", "eligibility", "-enable", id)
f.Assert().NoError(err)
}
tc.nodeIDs = []string{}
_, err := e2e.Command("nomad", "system", "gc")
f.Assert().NoError(err)
}
func nodesForJob(jobID string) ([]string, error) {
allocs, err := e2e.AllocsForJob(jobID, ns)
if err != nil {
return nil, err
}
if len(allocs) < 1 {
return nil, fmt.Errorf("no allocs found for job: %v", jobID)
}
nodes := []string{}
for _, alloc := range allocs {
nodes = append(nodes, alloc["Node ID"])
}
return nodes, nil
}
// waitForNodeDrain is a convenience wrapper that polls 'node status'
// until the comparison function over the state of the job's allocs on that
// node returns true
func waitForNodeDrain(nodeID string, comparison func([]map[string]string) bool, wc *e2e.WaitConfig) error {
var got []map[string]string
var err error
interval, retries := wc.OrDefault()
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
got, err = e2e.AllocsForNode(nodeID)
if err != nil {
return false, err
}
return comparison(got), nil
}, func(e error) {
err = fmt.Errorf("node drain status check failed: %v\n%#v", e, got)
})
return err
}
// TestNodeDrainDeadline tests the enforcement of the node drain deadline so
// that allocations are terminated even if they haven't gracefully exited.
func (tc *NodeDrainE2ETest) TestNodeDrainDeadline(f *framework.F) {
f.T().Skip("The behavior is unclear and test assertions don't capture intent. Issue 9902")
jobID := "test-node-drain-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "nodedrain/input/drain_deadline.nomad"))
tc.jobIDs = append(tc.jobIDs, jobID)
expected := []string{"running"}
f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")
nodes, err := nodesForJob(jobID)
f.NoError(err, "could not get nodes for job")
f.Len(nodes, 1, "could not get nodes for job")
nodeID := nodes[0]
f.T().Logf("draining node %v", nodeID)
out, err := e2e.Command(
"nomad", "node", "drain",
"-deadline", "5s",
"-enable", "-yes", "-detach", nodeID)
f.NoError(err, fmt.Sprintf("'nomad node drain %v' failed: %v\n%v", nodeID, err, out))
tc.nodeIDs = append(tc.nodeIDs, nodeID)
// the deadline is 40s but we can't guarantee its instantly terminated at
// that point, so we give it 30s which is well under the 2m kill_timeout in
// the job.
// deadline here needs to account for scheduling and propagation delays.
f.NoError(waitForNodeDrain(nodeID,
func(got []map[string]string) bool {
// FIXME: check the drain job alloc specifically. test
// may pass if client had another completed alloc
for _, alloc := range got {
if alloc["Status"] == "complete" {
return true
}
}
return false
}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
), "node did not drain immediately following deadline")
}
// TestNodeDrainForce tests the enforcement of the node drain -force flag so
// that allocations are terminated immediately.
func (tc *NodeDrainE2ETest) TestNodeDrainForce(f *framework.F) {
f.T().Skip("The behavior is unclear and test assertions don't capture intent. Issue 9902")
jobID := "test-node-drain-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "nodedrain/input/drain_deadline.nomad"))
tc.jobIDs = append(tc.jobIDs, jobID)
expected := []string{"running"}
f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")
nodes, err := nodesForJob(jobID)
f.NoError(err, "could not get nodes for job")
f.Len(nodes, 1, "could not get nodes for job")
nodeID := nodes[0]
out, err := e2e.Command(
"nomad", "node", "drain",
"-force",
"-enable", "-yes", "-detach", nodeID)
f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
tc.nodeIDs = append(tc.nodeIDs, nodeID)
// we've passed -force but we can't guarantee its instantly terminated at
// that point, so we give it 30s which is under the 2m kill_timeout in
// the job
f.NoError(waitForNodeDrain(nodeID,
func(got []map[string]string) bool {
// FIXME: check the drain job alloc specifically. test
// may pass if client had another completed alloc
for _, alloc := range got {
if alloc["Status"] == "complete" {
return true
}
}
return false
}, &e2e.WaitConfig{Interval: time.Second, Retries: 40},
), "node did not drain immediately when forced")
}

View file

@ -57,17 +57,19 @@ capability.
- `-disable`: Disable node drain mode. - `-disable`: Disable node drain mode.
- `-deadline`: Set the deadline by which all allocations must be moved off the - `-deadline`: Set the deadline by which all allocations must be moved off the
node. Remaining allocations after the deadline are force removed from the node. Remaining allocations after the deadline are removed from the node,
node. Defaults to 1 hour. regardless of their [`migrate`][] block. Defaults to 1 hour.
- `-detach`: Return immediately instead of entering monitor mode. - `-detach`: Return immediately instead of entering monitor mode.
- `-monitor`: Enter monitor mode directly without modifying the drain status. - `-monitor`: Enter monitor mode directly without modifying the drain status.
- `-force`: Force remove allocations off the node immediately. - `-force`: Remove allocations off the node immediately, regardless of the
allocation's [`migrate`][] block.
- `-no-deadline`: No deadline allows the allocations to drain off the node - `-no-deadline`: No deadline allows the allocations to drain off the node,
without being force stopped after a certain deadline. ignoring the default 1 hour deadline before allocations are removed regardless
of their [`migrate`][] block.
- `-ignore-system`: Ignore system allows the drain to complete without - `-ignore-system`: Ignore system allows the drain to complete without
stopping system job allocations. By default system jobs (and CSI stopping system job allocations. By default system jobs (and CSI