2018-03-08 23:08:23 +00:00
|
|
|
package drainer
|
2018-03-03 01:15:38 +00:00
|
|
|
|
|
|
|
import (
|
2023-03-23 18:07:09 +00:00
|
|
|
"fmt"
|
2018-03-03 01:15:38 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
"github.com/shoenig/test/must"
|
|
|
|
"github.com/shoenig/test/wait"
|
|
|
|
|
2022-03-15 12:42:43 +00:00
|
|
|
"github.com/hashicorp/nomad/ci"
|
2023-03-23 18:07:09 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
2018-03-03 01:15:38 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// TestNodeDrainWatcher_AddNodes tests that new nodes are added to the node
|
|
|
|
// watcher and deadline notifier, but only if they have a drain spec.
|
|
|
|
func TestNodeDrainWatcher_AddNodes(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2023-03-23 18:07:09 +00:00
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
2018-03-03 01:15:38 +00:00
|
|
|
|
|
|
|
// Create two nodes, one draining and one not draining
|
|
|
|
n1, n2 := mock.Node(), mock.Node()
|
|
|
|
n2.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
Deadline: time.Hour,
|
|
|
|
},
|
|
|
|
ForceDeadline: time.Now().Add(time.Hour),
|
|
|
|
}
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Create a job with a running alloc on each node
|
|
|
|
job := mock.Job()
|
|
|
|
jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
|
|
|
|
must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, 101, job))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
alloc1 := mock.Alloc()
|
|
|
|
alloc1.JobID = job.ID
|
|
|
|
alloc1.Job = job
|
|
|
|
alloc1.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
alloc1.NodeID = n1.ID
|
|
|
|
alloc1.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
|
|
|
|
alloc2 := alloc1.Copy()
|
|
|
|
alloc2.ID = uuid.Generate()
|
|
|
|
alloc2.NodeID = n2.ID
|
|
|
|
|
|
|
|
must.NoError(t, store.UpsertAllocs(
|
|
|
|
structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1, alloc2}))
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 103, n1))
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 104, n2))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Only 1 node is draining, and the other should not be tracked
|
|
|
|
assertTrackerSettled(t, tracker, []string{n2.ID})
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Notifications should fire to the job watcher and deadline notifier
|
|
|
|
must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID)
|
|
|
|
must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n2.ID)
|
2018-03-03 01:15:38 +00:00
|
|
|
}
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// TestNodeDrainWatcher_Remove tests that when a node should no longer be
|
|
|
|
// tracked that we stop tracking it in the node watcher and deadline notifier.
|
2018-03-03 01:15:38 +00:00
|
|
|
func TestNodeDrainWatcher_Remove(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2023-03-23 18:07:09 +00:00
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
t.Run("stop drain", func(t *testing.T) {
|
|
|
|
n, _ := testNodeDrainWatcherSetup(t, store, tracker)
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
index, _ := store.LatestIndex()
|
|
|
|
must.NoError(t, store.UpdateNodeDrain(
|
|
|
|
structs.MsgTypeTestSetup, index+1, n.ID, nil, false, 0, nil, nil, ""))
|
|
|
|
|
|
|
|
// Node with stopped drain should no longer be tracked
|
|
|
|
assertTrackerSettled(t, tracker, []string{})
|
|
|
|
must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
|
2018-03-03 01:15:38 +00:00
|
|
|
})
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
t.Run("delete node", func(t *testing.T) {
|
|
|
|
n, _ := testNodeDrainWatcherSetup(t, store, tracker)
|
|
|
|
index, _ := store.LatestIndex()
|
|
|
|
index++
|
|
|
|
must.NoError(t, store.DeleteNode(structs.MsgTypeTestSetup, index, []string{n.ID}))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Node with stopped drain should no longer be tracked
|
|
|
|
assertTrackerSettled(t, tracker, []string{})
|
|
|
|
must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
|
2018-03-03 01:15:38 +00:00
|
|
|
})
|
2023-03-23 18:07:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TestNodeDrainWatcher_NoRemove tests that when the node status changes to
|
|
|
|
// down/disconnected that we don't remove it from the node watcher or deadline
|
|
|
|
// notifier
|
|
|
|
func TestNodeDrainWatcher_NoRemove(t *testing.T) {
|
|
|
|
ci.Parallel(t)
|
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
|
|
|
n, _ := testNodeDrainWatcherSetup(t, store, tracker)
|
|
|
|
|
|
|
|
index, _ := store.LatestIndex()
|
|
|
|
n = n.Copy()
|
|
|
|
n.Status = structs.NodeStatusDisconnected
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n))
|
|
|
|
|
|
|
|
assertTrackerSettled(t, tracker, []string{n.ID})
|
|
|
|
must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID)
|
|
|
|
|
|
|
|
index, _ = store.LatestIndex()
|
|
|
|
n = n.Copy()
|
|
|
|
n.Status = structs.NodeStatusDown
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n))
|
|
|
|
|
|
|
|
assertTrackerSettled(t, tracker, []string{n.ID})
|
|
|
|
must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TestNodeDrainWatcher_Update_Spec tests drain spec updates emit events to the
|
|
|
|
// node watcher and deadline notifier.
|
|
|
|
func TestNodeDrainWatcher_Update_Spec(t *testing.T) {
|
|
|
|
ci.Parallel(t)
|
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
|
|
|
n, _ := testNodeDrainWatcherSetup(t, store, tracker)
|
|
|
|
|
|
|
|
// Update the spec to extend the deadline
|
|
|
|
strategy := n.DrainStrategy.Copy()
|
|
|
|
strategy.DrainSpec.Deadline += time.Hour
|
|
|
|
index, _ := store.LatestIndex()
|
|
|
|
must.NoError(t, store.UpdateNodeDrain(
|
|
|
|
structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(),
|
|
|
|
&structs.NodeEvent{}, map[string]string{}, "",
|
|
|
|
))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// We should see a new event
|
|
|
|
assertTrackerSettled(t, tracker, []string{n.ID})
|
|
|
|
|
|
|
|
// Update the spec to have an infinite deadline
|
|
|
|
strategy = strategy.Copy()
|
|
|
|
strategy.DrainSpec.Deadline = 0
|
|
|
|
|
|
|
|
index, _ = store.LatestIndex()
|
|
|
|
must.NoError(t, store.UpdateNodeDrain(
|
|
|
|
structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(),
|
|
|
|
&structs.NodeEvent{}, map[string]string{}, "",
|
|
|
|
))
|
|
|
|
|
|
|
|
// We should see a new event and the node should still be tracked but no
|
|
|
|
// longer in the deadline notifier
|
|
|
|
assertTrackerSettled(t, tracker, []string{n.ID})
|
|
|
|
must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// TestNodeDrainWatcher_Update_IsDone tests that a node drain without allocs
|
|
|
|
// immediately gets unmarked as draining, and that we unset drain if an operator
|
|
|
|
// drains a node with nothing on it.
|
|
|
|
func TestNodeDrainWatcher_Update_IsDone(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2023-03-23 18:07:09 +00:00
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
|
|
|
// Create a draining node
|
|
|
|
n := mock.Node()
|
2023-03-23 18:07:09 +00:00
|
|
|
strategy := &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{Deadline: time.Hour},
|
2018-03-06 18:12:17 +00:00
|
|
|
ForceDeadline: time.Now().Add(time.Hour),
|
|
|
|
}
|
2023-03-23 18:07:09 +00:00
|
|
|
n.DrainStrategy = strategy
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// There are no jobs on this node so the drain should immediately
|
|
|
|
// complete. we should no longer be tracking the node and its drain strategy
|
|
|
|
// should be cleared
|
|
|
|
assertTrackerSettled(t, tracker, []string{})
|
|
|
|
must.MapEmpty(t, tracker.jobWatcher.(*MockJobWatcher).jobs)
|
|
|
|
must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
|
|
|
|
n, _ = store.NodeByID(nil, n.ID)
|
|
|
|
must.Nil(t, n.DrainStrategy)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TestNodeDrainWatcher_Update_DrainComplete tests that allocation updates that
|
|
|
|
// complete the drain emits events to the node watcher and deadline notifier.
|
|
|
|
func TestNodeDrainWatcher_Update_DrainComplete(t *testing.T) {
|
|
|
|
ci.Parallel(t)
|
|
|
|
_, store, tracker := testNodeDrainWatcher(t)
|
|
|
|
n, _ := testNodeDrainWatcherSetup(t, store, tracker)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Simulate event: an alloc is terminal so DrainingJobWatcher.Migrated
|
|
|
|
// channel updates NodeDrainer, which updates Raft
|
|
|
|
_, err := tracker.raft.NodesDrainComplete([]string{n.ID},
|
|
|
|
structs.NewNodeEvent().
|
|
|
|
SetSubsystem(structs.NodeEventSubsystemDrain).
|
|
|
|
SetMessage(NodeDrainEventComplete))
|
|
|
|
must.NoError(t, err)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
assertTrackerSettled(t, tracker, []string{})
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
n, _ = store.NodeByID(nil, n.ID)
|
|
|
|
must.Nil(t, n.DrainStrategy)
|
|
|
|
must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
|
2018-03-03 01:15:38 +00:00
|
|
|
}
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
func testNodeDrainWatcherSetup(
|
|
|
|
t *testing.T, store *state.StateStore, tracker *NodeDrainer) (
|
|
|
|
*structs.Node, structs.NamespacedID) {
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
t.Helper()
|
|
|
|
index, _ := store.LatestIndex()
|
|
|
|
|
|
|
|
// Create a job that will have an alloc on our node
|
|
|
|
job := mock.Job()
|
|
|
|
jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
|
|
|
|
index++
|
|
|
|
must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, job))
|
|
|
|
|
|
|
|
// Create draining nodes, each with its own alloc for the job running on that node
|
|
|
|
node := mock.Node()
|
|
|
|
node.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{Deadline: time.Hour},
|
2018-03-03 01:15:38 +00:00
|
|
|
ForceDeadline: time.Now().Add(time.Hour),
|
|
|
|
}
|
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.Job = job
|
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
alloc.NodeID = node.ID
|
|
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
|
|
|
|
index++
|
|
|
|
must.NoError(t, store.UpsertAllocs(
|
|
|
|
structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc}))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
index++
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node))
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
// Node should be tracked and notifications should fire to the job watcher
|
|
|
|
// and deadline notifier
|
|
|
|
assertTrackerSettled(t, tracker, []string{node.ID})
|
|
|
|
must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID)
|
|
|
|
must.MapContainsKeys(t,
|
|
|
|
tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, []string{node.ID})
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
return node, jobID
|
|
|
|
}
|
|
|
|
|
|
|
|
func assertTrackerSettled(t *testing.T, tracker *NodeDrainer, nodeIDs []string) {
|
|
|
|
t.Helper()
|
2018-03-03 01:15:38 +00:00
|
|
|
|
2023-03-23 18:07:09 +00:00
|
|
|
must.Wait(t, wait.InitialSuccess(
|
|
|
|
wait.Timeout(100*time.Millisecond),
|
|
|
|
wait.Gap(time.Millisecond),
|
|
|
|
wait.TestFunc(func() (bool, error) {
|
|
|
|
if len(tracker.TrackedNodes()) != len(nodeIDs) {
|
|
|
|
return false, fmt.Errorf(
|
|
|
|
"expected nodes %v to become marked draining, got %d",
|
|
|
|
nodeIDs, len(tracker.TrackedNodes()))
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}),
|
|
|
|
))
|
|
|
|
|
|
|
|
must.Wait(t, wait.ContinualSuccess(
|
|
|
|
wait.Timeout(100*time.Millisecond),
|
|
|
|
wait.Gap(10*time.Millisecond),
|
|
|
|
wait.TestFunc(func() (bool, error) {
|
|
|
|
if len(tracker.TrackedNodes()) != len(nodeIDs) {
|
|
|
|
return false, fmt.Errorf(
|
|
|
|
"expected nodes %v to stay marked draining, got %d",
|
|
|
|
nodeIDs, len(tracker.TrackedNodes()))
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}),
|
|
|
|
))
|
|
|
|
|
|
|
|
for _, nodeID := range nodeIDs {
|
|
|
|
must.MapContainsKey(t, tracker.TrackedNodes(), nodeID)
|
|
|
|
}
|
2018-03-03 01:15:38 +00:00
|
|
|
}
|