2018-03-08 23:08:23 +00:00
|
|
|
package drainer
|
2018-03-06 18:12:17 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2022-03-15 12:42:43 +00:00
|
|
|
"github.com/hashicorp/nomad/ci"
|
2018-03-06 18:12:17 +00:00
|
|
|
"github.com/hashicorp/nomad/helper"
|
|
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
2018-04-25 20:21:36 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
2018-03-06 18:12:17 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2018-03-10 00:25:46 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2018-03-06 18:12:17 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"golang.org/x/time/rate"
|
|
|
|
)
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
|
|
|
|
n1 := mock.Node()
|
|
|
|
n1.Name = "draining"
|
|
|
|
n1.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
Deadline: time.Minute,
|
|
|
|
},
|
|
|
|
ForceDeadline: time.Now().Add(time.Minute),
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1))
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// Create a non-draining node
|
|
|
|
n2 := mock.Node()
|
|
|
|
n2.Name = "running"
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2))
|
2018-03-10 00:25:46 +00:00
|
|
|
return n1, n2
|
|
|
|
}
|
|
|
|
|
|
|
|
func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
|
2018-03-06 18:12:17 +00:00
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
limiter := rate.NewLimiter(100.0, 100)
|
2018-09-15 23:23:13 +00:00
|
|
|
logger := testlog.HCLogger(t)
|
2018-03-10 00:25:46 +00:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
w := NewDrainingJobWatcher(ctx, limiter, state, logger)
|
|
|
|
return w, cancel
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// TestDrainingJobWatcher_Interface is a compile-time assertion that we
|
|
|
|
// implement the intended interface.
|
2018-03-06 18:12:17 +00:00
|
|
|
func TestDrainingJobWatcher_Interface(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
|
|
|
|
cancel()
|
|
|
|
var _ DrainingJobWatcher = w
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-19 17:36:31 +00:00
|
|
|
// asertJobWatcherOps asserts a certain number of allocs are drained and/or
|
|
|
|
// migrated by the job watcher.
|
|
|
|
func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) (
|
|
|
|
*DrainRequest, []*structs.Allocation) {
|
|
|
|
t.Helper()
|
|
|
|
var (
|
|
|
|
drains *DrainRequest
|
|
|
|
migrations []*structs.Allocation
|
|
|
|
drainsChecked, migrationsChecked bool
|
|
|
|
)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case drains = <-jw.Drain():
|
|
|
|
ids := make([]string, len(drains.Allocs))
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
|
|
}
|
|
|
|
t.Logf("draining %d allocs: %v", len(ids), ids)
|
|
|
|
require.False(t, drainsChecked, "drains already received")
|
|
|
|
drainsChecked = true
|
|
|
|
require.Lenf(t, drains.Allocs, drained,
|
|
|
|
"expected %d drains but found %d", drained, len(drains.Allocs))
|
|
|
|
case migrations = <-jw.Migrated():
|
|
|
|
ids := make([]string, len(migrations))
|
|
|
|
for i, a := range migrations {
|
|
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
|
|
}
|
|
|
|
t.Logf("migrating %d allocs: %v", len(ids), ids)
|
|
|
|
require.False(t, migrationsChecked, "migrations already received")
|
|
|
|
migrationsChecked = true
|
|
|
|
require.Lenf(t, migrations, migrated,
|
|
|
|
"expected %d migrations but found %d", migrated, len(migrations))
|
|
|
|
case <-time.After(10 * time.Millisecond):
|
|
|
|
if !drainsChecked && drained > 0 {
|
|
|
|
t.Fatalf("expected %d drains but none happened", drained)
|
|
|
|
}
|
|
|
|
if !migrationsChecked && migrated > 0 {
|
|
|
|
t.Fatalf("expected %d migrations but none happened", migrated)
|
|
|
|
}
|
|
|
|
return drains, migrations
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
|
|
|
|
// allocation changes from multiple jobs.
|
|
|
|
func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2018-03-06 18:12:17 +00:00
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
state := state.TestStateStore(t)
|
2018-03-10 00:25:46 +00:00
|
|
|
jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state)
|
|
|
|
defer cancelWatcher()
|
|
|
|
drainingNode, runningNode := testNodes(t, state)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
var index uint64 = 101
|
|
|
|
count := 8
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
|
2018-03-06 18:12:17 +00:00
|
|
|
a := mock.Alloc()
|
2018-03-10 00:25:46 +00:00
|
|
|
a.JobID = job.ID
|
2018-03-06 18:12:17 +00:00
|
|
|
a.Job = job
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
2018-03-10 00:25:46 +00:00
|
|
|
a.NodeID = node.ID
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2 jobs with count 10, max parallel 3
|
2018-03-19 17:12:12 +00:00
|
|
|
jnss := make([]structs.NamespacedID, 2)
|
2018-03-10 00:25:46 +00:00
|
|
|
jobs := make([]*structs.Job, 2)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
|
|
job := mock.Job()
|
|
|
|
jobs[i] = job
|
2018-03-19 17:12:12 +00:00
|
|
|
jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
|
2018-03-10 00:25:46 +00:00
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = 3
|
|
|
|
job.TaskGroups[0].Count = count
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, index, job))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
a := newAlloc(drainingNode, job)
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
}
|
|
|
|
allocs = append(allocs, a)
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
2018-03-10 00:25:46 +00:00
|
|
|
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// Only register jobs with watcher after creating all data models as
|
|
|
|
// once the watcher starts we need to track the index carefully for
|
|
|
|
// updating the batch future
|
|
|
|
jobWatcher.RegisterJobs(jnss)
|
|
|
|
|
|
|
|
// Expect a first batch of MaxParallel allocs from each job
|
2018-03-19 17:36:31 +00:00
|
|
|
drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// Fake migrating the drained allocs by starting new ones and stopping
|
|
|
|
// the old ones
|
|
|
|
drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
2018-03-10 00:25:46 +00:00
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
index++
|
|
|
|
|
|
|
|
// Just setting ShouldMigrate should not cause any further drains
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// Proceed our fake migration along by creating new allocs and stopping
|
|
|
|
// old ones
|
|
|
|
replacements := make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
// Stop drained allocs
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
// Create a replacement
|
|
|
|
replacement := mock.Alloc()
|
|
|
|
replacement.JobID = a.Job.ID
|
|
|
|
replacement.Job = a.Job
|
|
|
|
replacement.TaskGroup = a.TaskGroup
|
|
|
|
replacement.NodeID = runningNode.ID
|
|
|
|
// start in pending state with no health status
|
|
|
|
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
|
|
|
// The drained allocs stopping cause migrations but no new drains
|
|
|
|
// because the replacements have not started
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// Finally kickoff further drain activity by "starting" replacements
|
|
|
|
for _, a := range replacements {
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
2018-03-10 00:25:46 +00:00
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// 6 new drains
|
2018-03-19 17:36:31 +00:00
|
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// Fake migrations once more to finish the drain
|
|
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
2018-03-10 00:25:46 +00:00
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
index++
|
|
|
|
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
for _, a := range replacements {
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
2018-03-10 00:25:46 +00:00
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
|
|
|
|
|
|
|
// Final 4 new drains
|
2018-03-19 17:36:31 +00:00
|
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// Fake migrations once more to finish the drain
|
|
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
|
|
for i, a := range drains.Allocs {
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
|
|
|
|
// create a copy so we can reuse this slice
|
|
|
|
drainedAllocs[i] = a.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
2018-03-10 00:25:46 +00:00
|
|
|
drains.Resp.Respond(index, nil)
|
|
|
|
index++
|
|
|
|
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
|
|
for i, a := range drainedAllocs {
|
|
|
|
a.DesiredTransition.Migrate = nil
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
|
|
updates = append(updates, a, replacement)
|
|
|
|
replacements[i] = replacement.Copy()
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
2018-03-10 00:25:46 +00:00
|
|
|
index++
|
|
|
|
|
2018-03-19 17:36:31 +00:00
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 4)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
for _, a := range replacements {
|
|
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
}
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
2018-03-10 00:25:46 +00:00
|
|
|
|
|
|
|
// No jobs should be left!
|
|
|
|
require.Empty(jobWatcher.drainingJobs())
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// DrainingJobWatcher tests:
|
|
|
|
// TODO Test that the watcher cancels its query when a new job is registered
|
|
|
|
|
|
|
|
// handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup
|
|
|
|
//
|
|
|
|
// Two nodes will be initialized: one draining and one running.
|
|
|
|
type handleTaskGroupTestCase struct {
|
|
|
|
// Name of test
|
|
|
|
Name string
|
|
|
|
|
2018-03-29 23:38:47 +00:00
|
|
|
// Batch uses a batch job and alloc
|
|
|
|
Batch bool
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// Expectations
|
|
|
|
ExpectedDrained int
|
|
|
|
ExpectedMigrated int
|
|
|
|
ExpectedDone bool
|
|
|
|
|
|
|
|
// Count overrides the default count of 10 if set
|
|
|
|
Count int
|
|
|
|
|
|
|
|
// MaxParallel overrides the default max_parallel of 1 if set
|
|
|
|
MaxParallel int
|
|
|
|
|
|
|
|
// AddAlloc will be called 10 times to create test allocs
|
|
|
|
//
|
|
|
|
// Allocs default to be healthy on the draining node
|
|
|
|
AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestHandeTaskGroup_Table(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
cases := []handleTaskGroupTestCase{
|
2018-03-06 18:12:17 +00:00
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// All allocs on draining node
|
|
|
|
Name: "AllDraining",
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// All allocs on non-draining node
|
|
|
|
Name: "AllNonDraining",
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: true,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
a.NodeID = runningID
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// Some allocs on non-draining node but not healthy
|
|
|
|
Name: "SomeNonDrainingUnhealthy",
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
if i%2 == 0 {
|
|
|
|
a.NodeID = runningID
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// One draining, other allocs on non-draining node and healthy
|
|
|
|
Name: "OneDraining",
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
if i != 0 {
|
|
|
|
a.NodeID = runningID
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// One already draining, other allocs on non-draining node and healthy
|
|
|
|
Name: "OneAlreadyDraining",
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
if i == 0 {
|
|
|
|
a.DesiredTransition.Migrate = helper.BoolToPtr(true)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
a.NodeID = runningID
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// One already drained, other allocs on non-draining node and healthy
|
|
|
|
Name: "OneAlreadyDrained",
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
ExpectedDone: true,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
if i == 0 {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
return
|
|
|
|
}
|
|
|
|
a.NodeID = runningID
|
|
|
|
},
|
|
|
|
},
|
2018-03-29 23:38:47 +00:00
|
|
|
{
|
|
|
|
// One already drained, other allocs on non-draining node and healthy
|
|
|
|
Name: "OneAlreadyDrainedBatched",
|
|
|
|
Batch: true,
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
ExpectedDone: true,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
if i == 0 {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
return
|
|
|
|
}
|
|
|
|
a.NodeID = runningID
|
|
|
|
},
|
|
|
|
},
|
2018-03-10 00:25:46 +00:00
|
|
|
{
|
|
|
|
// All allocs are terminl, nothing to be drained
|
|
|
|
Name: "AllMigrating",
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 10,
|
|
|
|
ExpectedDone: true,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
2018-03-29 23:38:47 +00:00
|
|
|
{
|
|
|
|
// All allocs are terminl, nothing to be drained
|
|
|
|
Name: "AllMigratingBatch",
|
|
|
|
Batch: true,
|
|
|
|
ExpectedDrained: 0,
|
|
|
|
ExpectedMigrated: 10,
|
|
|
|
ExpectedDone: true,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
},
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// All allocs may be drained at once
|
|
|
|
Name: "AllAtOnce",
|
|
|
|
ExpectedDrained: 10,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
|
|
|
MaxParallel: 10,
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// Drain 2
|
|
|
|
Name: "Drain2",
|
|
|
|
ExpectedDrained: 2,
|
|
|
|
ExpectedMigrated: 0,
|
|
|
|
ExpectedDone: false,
|
|
|
|
MaxParallel: 2,
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// One on new node, one drained, and one draining
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
MaxParallel: 2,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
switch i {
|
|
|
|
case 0:
|
|
|
|
// One alloc on running node
|
|
|
|
a.NodeID = runningID
|
|
|
|
case 1:
|
|
|
|
// One alloc already migrated
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// 8 on new node, one drained, and one draining
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
MaxParallel: 2,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
switch i {
|
|
|
|
case 0, 1, 2, 3, 4, 5, 6, 7:
|
|
|
|
a.NodeID = runningID
|
|
|
|
case 8:
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// 5 on new node, two drained, and three draining
|
|
|
|
ExpectedDrained: 3,
|
|
|
|
ExpectedMigrated: 2,
|
|
|
|
MaxParallel: 5,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
switch i {
|
|
|
|
case 0, 1, 2, 3, 4:
|
|
|
|
a.NodeID = runningID
|
|
|
|
case 8, 9:
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
{
|
2018-03-10 00:25:46 +00:00
|
|
|
// Not all on new node have health set
|
|
|
|
Name: "PendingHealth",
|
|
|
|
ExpectedDrained: 1,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
MaxParallel: 3,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
switch i {
|
|
|
|
case 0:
|
|
|
|
// Deployment status UNset for 1 on new node
|
|
|
|
a.NodeID = runningID
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
case 1, 2, 3, 4:
|
|
|
|
// Deployment status set for 4 on new node
|
|
|
|
a.NodeID = runningID
|
|
|
|
case 9:
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
}
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// 5 max parallel - 1 migrating - 2 with unset health = 2 drainable
|
|
|
|
Name: "PendingHealthHigherMax",
|
|
|
|
ExpectedDrained: 2,
|
|
|
|
ExpectedMigrated: 1,
|
|
|
|
MaxParallel: 5,
|
|
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
|
|
switch i {
|
|
|
|
case 0, 1:
|
|
|
|
// Deployment status UNset for 2 on new node
|
|
|
|
a.NodeID = runningID
|
|
|
|
a.DeploymentStatus = nil
|
|
|
|
case 2, 3, 4:
|
|
|
|
// Deployment status set for 3 on new node
|
|
|
|
a.NodeID = runningID
|
|
|
|
case 9:
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
}
|
|
|
|
},
|
2018-03-06 18:12:17 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
for _, testCase := range cases {
|
|
|
|
t.Run(testCase.Name, func(t *testing.T) {
|
|
|
|
testHandleTaskGroup(t, testCase)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
require := require.New(t)
|
|
|
|
assert := assert.New(t)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// Create nodes
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
drainingNode, runningNode := testNodes(t, state)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
job := mock.Job()
|
2018-03-29 23:38:47 +00:00
|
|
|
if tc.Batch {
|
|
|
|
job = mock.BatchJob()
|
|
|
|
}
|
2018-03-10 00:25:46 +00:00
|
|
|
job.TaskGroups[0].Count = 10
|
|
|
|
if tc.Count > 0 {
|
|
|
|
job.TaskGroups[0].Count = tc.Count
|
|
|
|
}
|
|
|
|
if tc.MaxParallel > 0 {
|
|
|
|
job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 102, job))
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
var allocs []*structs.Allocation
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
a := mock.Alloc()
|
2018-03-29 23:38:47 +00:00
|
|
|
if tc.Batch {
|
|
|
|
a = mock.BatchAlloc()
|
|
|
|
}
|
2018-03-10 00:25:46 +00:00
|
|
|
a.JobID = job.ID
|
|
|
|
a.Job = job
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
// Default to being healthy on the draining node
|
|
|
|
a.NodeID = drainingNode.ID
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(true),
|
|
|
|
}
|
|
|
|
if tc.AddAlloc != nil {
|
|
|
|
tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID)
|
|
|
|
}
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
}
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs))
|
2018-03-10 00:25:46 +00:00
|
|
|
snap, err := state.Snapshot()
|
|
|
|
require.Nil(err)
|
2018-03-06 18:12:17 +00:00
|
|
|
|
2018-03-10 00:25:46 +00:00
|
|
|
res := newJobResult()
|
2018-03-29 23:38:47 +00:00
|
|
|
require.Nil(handleTaskGroup(snap, tc.Batch, job.TaskGroups[0], allocs, 102, res))
|
2018-03-10 00:25:46 +00:00
|
|
|
assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d",
|
|
|
|
tc.ExpectedDrained, len(res.drain))
|
|
|
|
assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d",
|
|
|
|
tc.ExpectedMigrated, len(res.migrated))
|
|
|
|
assert.Equal(tc.ExpectedDone, res.done)
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestHandleTaskGroup_Migrations(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2018-03-06 18:12:17 +00:00
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
// Create a draining node
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
n := mock.Node()
|
|
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
Deadline: 5 * time.Minute,
|
|
|
|
},
|
|
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
2018-03-06 18:12:17 +00:00
|
|
|
|
|
|
|
job := mock.Job()
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job))
|
2018-03-06 18:12:17 +00:00
|
|
|
|
|
|
|
// Create 10 done allocs
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
a := mock.Alloc()
|
|
|
|
a.Job = job
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
a.NodeID = n.ID
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
}
|
|
|
|
|
|
|
|
if i%2 == 0 {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
} else {
|
|
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
}
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
2018-03-06 18:12:17 +00:00
|
|
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
require.Nil(err)
|
|
|
|
|
2018-03-29 23:38:47 +00:00
|
|
|
// Handle before and after indexes as both service and batch
|
2018-03-20 23:27:24 +00:00
|
|
|
res := newJobResult()
|
2018-03-29 21:30:38 +00:00
|
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
2018-03-06 18:12:17 +00:00
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Len(res.migrated, 10)
|
|
|
|
require.True(res.done)
|
|
|
|
|
2018-03-29 23:38:47 +00:00
|
|
|
res = newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Len(res.migrated, 10)
|
|
|
|
require.True(res.done)
|
|
|
|
|
2018-03-20 23:27:24 +00:00
|
|
|
res = newJobResult()
|
2018-03-29 21:30:38 +00:00
|
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
2018-03-06 18:12:17 +00:00
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
require.True(res.done)
|
2018-03-29 23:38:47 +00:00
|
|
|
|
|
|
|
res = newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
require.True(res.done)
|
2018-03-06 18:12:17 +00:00
|
|
|
}
|
2018-04-25 20:21:36 +00:00
|
|
|
|
|
|
|
// This test asserts that handle task group works when an allocation is on a
|
|
|
|
// garbage collected node
|
|
|
|
func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2018-04-25 20:21:36 +00:00
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
// Create a draining node
|
|
|
|
state := state.TestStateStore(t)
|
|
|
|
n := mock.Node()
|
|
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
|
|
DrainSpec: structs.DrainSpec{
|
|
|
|
Deadline: 5 * time.Minute,
|
|
|
|
},
|
|
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
|
|
}
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
2018-04-25 20:21:36 +00:00
|
|
|
|
|
|
|
job := mock.Job()
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job))
|
2018-04-25 20:21:36 +00:00
|
|
|
|
|
|
|
// Create 10 done allocs
|
|
|
|
var allocs []*structs.Allocation
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
a := mock.Alloc()
|
|
|
|
a.Job = job
|
|
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
a.NodeID = n.ID
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
}
|
|
|
|
|
|
|
|
if i%2 == 0 {
|
|
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
} else {
|
|
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
}
|
|
|
|
allocs = append(allocs, a)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the first one be on a GC'd node
|
|
|
|
allocs[0].NodeID = uuid.Generate()
|
2020-10-19 13:30:15 +00:00
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
2018-04-25 20:21:36 +00:00
|
|
|
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Handle before and after indexes as both service and batch
|
|
|
|
res := newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Len(res.migrated, 9)
|
|
|
|
require.True(res.done)
|
|
|
|
|
|
|
|
res = newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Len(res.migrated, 9)
|
|
|
|
require.True(res.done)
|
|
|
|
|
|
|
|
res = newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
require.True(res.done)
|
|
|
|
|
|
|
|
res = newJobResult()
|
|
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
|
|
require.Empty(res.drain)
|
|
|
|
require.Empty(res.migrated)
|
|
|
|
require.True(res.done)
|
|
|
|
}
|