778 lines
23 KiB
Go
778 lines
23 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package drainer
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/shoenig/test"
|
|
"github.com/shoenig/test/must"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/time/rate"
|
|
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/state"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
|
|
n1 := mock.Node()
|
|
n1.Name = "draining"
|
|
n1.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(time.Minute),
|
|
}
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1))
|
|
|
|
// Create a non-draining node
|
|
n2 := mock.Node()
|
|
n2.Name = "running"
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2))
|
|
return n1, n2
|
|
}
|
|
|
|
func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
|
|
t.Helper()
|
|
|
|
limiter := rate.NewLimiter(100.0, 100)
|
|
logger := testlog.HCLogger(t)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
w := NewDrainingJobWatcher(ctx, limiter, state, logger)
|
|
return w, cancel
|
|
}
|
|
|
|
// TestDrainingJobWatcher_Interface is a compile-time assertion that we
|
|
// implement the intended interface.
|
|
func TestDrainingJobWatcher_Interface(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
|
|
cancel()
|
|
var _ DrainingJobWatcher = w
|
|
}
|
|
|
|
// asertJobWatcherOps asserts a certain number of allocs are drained and/or
|
|
// migrated by the job watcher.
|
|
func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) (
|
|
*DrainRequest, []*structs.Allocation) {
|
|
t.Helper()
|
|
var (
|
|
drains *DrainRequest
|
|
migrations []*structs.Allocation
|
|
drainsChecked, migrationsChecked bool
|
|
)
|
|
for {
|
|
select {
|
|
case drains = <-jw.Drain():
|
|
ids := make([]string, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
}
|
|
t.Logf("draining %d allocs: %v", len(ids), ids)
|
|
require.False(t, drainsChecked, "drains already received")
|
|
drainsChecked = true
|
|
require.Lenf(t, drains.Allocs, drained,
|
|
"expected %d drains but found %d", drained, len(drains.Allocs))
|
|
case migrations = <-jw.Migrated():
|
|
ids := make([]string, len(migrations))
|
|
for i, a := range migrations {
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
}
|
|
t.Logf("migrating %d allocs: %v", len(ids), ids)
|
|
require.False(t, migrationsChecked, "migrations already received")
|
|
migrationsChecked = true
|
|
require.Lenf(t, migrations, migrated,
|
|
"expected %d migrations but found %d", migrated, len(migrations))
|
|
case <-time.After(10 * time.Millisecond):
|
|
if !drainsChecked && drained > 0 {
|
|
t.Fatalf("expected %d drains but none happened", drained)
|
|
}
|
|
if !migrationsChecked && migrated > 0 {
|
|
t.Fatalf("expected %d migrations but none happened", migrated)
|
|
}
|
|
return drains, migrations
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
|
|
// allocation changes from multiple jobs.
|
|
func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
store := state.TestStateStore(t)
|
|
jobWatcher, cancelWatcher := testDrainingJobWatcher(t, store)
|
|
defer cancelWatcher()
|
|
drainingNode, runningNode := testNodes(t, store)
|
|
|
|
var index uint64 = 101
|
|
count := 8
|
|
|
|
newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
|
|
a := mock.Alloc()
|
|
a.JobID = job.ID
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = node.ID
|
|
return a
|
|
}
|
|
|
|
// 2 jobs with count 10, max parallel 3
|
|
jnss := make([]structs.NamespacedID, 2)
|
|
jobs := make([]*structs.Job, 2)
|
|
for i := 0; i < 2; i++ {
|
|
job := mock.Job()
|
|
jobs[i] = job
|
|
jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
|
|
job.TaskGroups[0].Migrate.MaxParallel = 3
|
|
job.TaskGroups[0].Count = count
|
|
must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job))
|
|
index++
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < count; i++ {
|
|
a := newAlloc(drainingNode, job)
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs))
|
|
index++
|
|
|
|
}
|
|
|
|
// Only register jobs with watcher after creating all data models as
|
|
// once the watcher starts we need to track the index carefully for
|
|
// updating the batch future
|
|
jobWatcher.RegisterJobs(jnss)
|
|
|
|
// Expect a first batch of MaxParallel allocs from each job
|
|
drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0)
|
|
|
|
// Fake migrating the drained allocs by starting new ones and stopping
|
|
// the old ones
|
|
drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
// Just setting ShouldMigrate should not cause any further drains
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
// Proceed our fake migration along by creating new allocs and stopping
|
|
// old ones
|
|
replacements := make([]*structs.Allocation, len(drainedAllocs))
|
|
updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
// Stop drained allocs
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
// Create a replacement
|
|
replacement := mock.Alloc()
|
|
replacement.JobID = a.Job.ID
|
|
replacement.Job = a.Job
|
|
replacement.TaskGroup = a.TaskGroup
|
|
replacement.NodeID = runningNode.ID
|
|
// start in pending state with no health status
|
|
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
// The drained allocs stopping cause migrations but no new drains
|
|
// because the replacements have not started
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
// Client sends stop on these allocs
|
|
completeAllocs := make([]*structs.Allocation, len(drainedAllocs))
|
|
for i, a := range drainedAllocs {
|
|
a = a.Copy()
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
completeAllocs[i] = a
|
|
}
|
|
must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, index, completeAllocs))
|
|
index++
|
|
|
|
// The drained allocs stopping cause migrations but no new drains
|
|
// because the replacements have not started
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
|
|
|
// Finally kickoff further drain activity by "starting" replacements
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
index++
|
|
|
|
must.MapNotEmpty(t, jobWatcher.drainingJobs())
|
|
|
|
// 6 new drains
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0)
|
|
|
|
// Fake migrations once more to finish the drain
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
|
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
index++
|
|
|
|
must.MapNotEmpty(t, jobWatcher.drainingJobs())
|
|
|
|
// Final 4 new drains
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0)
|
|
|
|
// Fake migrations once more to finish the drain
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 4)
|
|
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
|
|
// No jobs should be left!
|
|
must.MapEmpty(t, jobWatcher.drainingJobs())
|
|
}
|
|
|
|
// TestDrainingJobWatcher_HandleTaskGroup tests that the watcher handles
|
|
// allocation updates as expected.
|
|
func TestDrainingJobWatcher_HandleTaskGroup(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
testCases := []struct {
|
|
name string
|
|
batch bool // use a batch job
|
|
allocCount int // number of allocs in test (defaults to 10)
|
|
maxParallel int // max_parallel (defaults to 1)
|
|
|
|
// addAllocFn will be called allocCount times to create test allocs,
|
|
// and the allocs default to be healthy on the draining node
|
|
addAllocFn func(idx int, a *structs.Allocation, drainingID, runningID string)
|
|
|
|
expectDrained int
|
|
expectMigrated int
|
|
expectDone bool
|
|
}{
|
|
{
|
|
// all allocs on draining node, should respect max_parallel=1
|
|
name: "drain-respects-max-parallel-1",
|
|
expectDrained: 1,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
},
|
|
{
|
|
// allocs on a non-draining node, should not be drained
|
|
name: "allocs-on-non-draining-node-should-not-drain",
|
|
expectDrained: 0,
|
|
expectMigrated: 0,
|
|
expectDone: true,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// even unhealthy allocs on a non-draining node should not be drained
|
|
name: "unhealthy-allocs-on-non-draining-node-should-not-drain",
|
|
expectDrained: 0,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i%2 == 0 {
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// only the alloc on draining node should be drained
|
|
name: "healthy-alloc-draining-node-should-drain",
|
|
expectDrained: 1,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i != 0 {
|
|
a.NodeID = runningID
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// alloc that's still draining doesn't produce more result updates
|
|
name: "still-draining-alloc-no-new-updates",
|
|
expectDrained: 0,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// alloc that's finished draining gets marked as migrated
|
|
name: "client-terminal-alloc-drain-should-be-finished",
|
|
expectDrained: 0,
|
|
expectMigrated: 1,
|
|
expectDone: true,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// batch alloc that's finished draining gets marked as migrated
|
|
name: "client-terminal-batch-alloc-drain-should-be-finished",
|
|
batch: true,
|
|
expectDrained: 0,
|
|
expectMigrated: 1,
|
|
expectDone: true,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// all allocs are client-terminal, so nothing left to drain
|
|
name: "all-client-terminal-drain-should-be-finished",
|
|
expectDrained: 0,
|
|
expectMigrated: 10,
|
|
expectDone: true,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
},
|
|
},
|
|
{
|
|
// all allocs are terminal, but only half are client-terminal
|
|
name: "half-client-terminal-drain-should-not-be-finished",
|
|
expectDrained: 0,
|
|
expectMigrated: 5,
|
|
expectDone: false,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
if i%2 == 0 {
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// All allocs are terminal, nothing to be drained
|
|
name: "all-terminal-batch",
|
|
batch: true,
|
|
expectDrained: 0,
|
|
expectMigrated: 10,
|
|
expectDone: true,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
},
|
|
},
|
|
{
|
|
// with max_parallel=10, all allocs can be drained at once
|
|
name: "drain-respects-max-parallel-all-at-once",
|
|
expectDrained: 10,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
maxParallel: 10,
|
|
},
|
|
{
|
|
// with max_parallel=2, up to 2 allocs can be drained at a time
|
|
name: "drain-respects-max-parallel-2",
|
|
expectDrained: 2,
|
|
expectMigrated: 0,
|
|
expectDone: false,
|
|
maxParallel: 2,
|
|
},
|
|
{
|
|
// with max_parallel=2, up to 2 allocs can be drained at a time but
|
|
// we haven't yet informed the drainer that 1 has completed
|
|
// migrating
|
|
name: "notify-migrated-1-on-new-1-drained-1-draining",
|
|
expectDrained: 1,
|
|
expectMigrated: 1,
|
|
maxParallel: 2,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0:
|
|
// One alloc on running node
|
|
a.NodeID = runningID
|
|
case 1:
|
|
// One alloc already migrated
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// with max_parallel=2, up to 2 allocs can be drained at a time but
|
|
// we haven't yet informed the drainer that 1 has completed
|
|
// migrating
|
|
name: "notify-migrated-8-on-new-1-drained-1-draining",
|
|
expectDrained: 1,
|
|
expectMigrated: 1,
|
|
maxParallel: 2,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1, 2, 3, 4, 5, 6, 7:
|
|
a.NodeID = runningID
|
|
case 8:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// 5 on new node, two drained, and three draining
|
|
// with max_parallel=5, up to 5 allocs can be drained at a time but
|
|
// we haven't yet informed the drainer that 2 have completed
|
|
// migrating
|
|
name: "notify-migrated-5-on-new-2-drained-3-draining",
|
|
expectDrained: 3,
|
|
expectMigrated: 2,
|
|
maxParallel: 5,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1, 2, 3, 4:
|
|
a.NodeID = runningID
|
|
case 8, 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// half the allocs have been moved to the new node but 1 doesn't
|
|
// have health set yet, so we should have MaxParallel - 1 in flight
|
|
name: "pending-health-blocks",
|
|
expectDrained: 1,
|
|
expectMigrated: 1,
|
|
maxParallel: 3,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0:
|
|
// Deployment status UNset for 1 on new node
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
case 1, 2, 3, 4:
|
|
// Deployment status set for 4 on new node
|
|
a.NodeID = runningID
|
|
case 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// half the allocs have been moved to the new node but 2 don't have
|
|
// health set yet, so we should have MaxParallel - 2 in flight
|
|
name: "pending-health-blocks-higher-max",
|
|
expectDrained: 2,
|
|
expectMigrated: 1,
|
|
maxParallel: 5,
|
|
addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1:
|
|
// Deployment status UNset for 2 on new node
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
case 2, 3, 4:
|
|
// Deployment status set for 3 on new node
|
|
a.NodeID = runningID
|
|
case 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
}
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
// Create nodes
|
|
store := state.TestStateStore(t)
|
|
drainingNode, runningNode := testNodes(t, store)
|
|
|
|
job := mock.Job()
|
|
if tc.batch {
|
|
job = mock.BatchJob()
|
|
}
|
|
job.TaskGroups[0].Count = 10
|
|
if tc.allocCount > 0 {
|
|
job.TaskGroups[0].Count = tc.allocCount
|
|
}
|
|
if tc.maxParallel > 0 {
|
|
job.TaskGroups[0].Migrate.MaxParallel = tc.maxParallel
|
|
}
|
|
must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, 102, nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
if tc.batch {
|
|
a = mock.BatchAlloc()
|
|
}
|
|
a.JobID = job.ID
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
// Default to being healthy on the draining node
|
|
a.NodeID = drainingNode.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
if tc.addAllocFn != nil {
|
|
tc.addAllocFn(i, a, drainingNode.ID, runningNode.ID)
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs))
|
|
snap, err := store.Snapshot()
|
|
must.NoError(t, err)
|
|
|
|
res := newJobResult()
|
|
must.NoError(t, handleTaskGroup(snap, tc.batch, job.TaskGroups[0], allocs, 102, res))
|
|
test.Len(t, tc.expectDrained, res.drain, test.Sprint("expected drained allocs"))
|
|
test.Len(t, tc.expectMigrated, res.migrated, test.Sprint("expected migrated allocs"))
|
|
test.Eq(t, tc.expectDone, res.done)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestHandleTaskGroup_Migrations(t *testing.T) {
|
|
ci.Parallel(t)
|
|
require := require.New(t)
|
|
|
|
// Create a draining node
|
|
state := state.TestStateStore(t)
|
|
n := mock.Node()
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: 5 * time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
}
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
|
|
|
job := mock.Job()
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job))
|
|
|
|
// Create 10 done allocs
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = n.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
}
|
|
|
|
if i%2 == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
} else {
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
|
|
|
snap, err := state.Snapshot()
|
|
require.Nil(err)
|
|
|
|
// Handle before and after indexes as both service and batch
|
|
res := newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 10)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 10)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
}
|
|
|
|
// This test asserts that handle task group works when an allocation is on a
|
|
// garbage collected node
|
|
func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) {
|
|
ci.Parallel(t)
|
|
require := require.New(t)
|
|
|
|
// Create a draining node
|
|
state := state.TestStateStore(t)
|
|
n := mock.Node()
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: 5 * time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
}
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
|
|
|
job := mock.Job()
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job))
|
|
|
|
// Create 10 done allocs
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = n.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
}
|
|
|
|
if i%2 == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
a.ClientStatus = structs.AllocClientStatusComplete
|
|
} else {
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
// Make the first one be on a GC'd node
|
|
allocs[0].NodeID = uuid.Generate()
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
|
|
|
snap, err := state.Snapshot()
|
|
require.Nil(err)
|
|
|
|
// Handle before and after indexes as both service and batch
|
|
res := newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 9)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 9)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
}
|