ba728f8f97
* api: enable support for setting original source alongside job This PR adds support for setting job source material along with the registration of a job. This includes a new HTTP endpoint and a new RPC endpoint for making queries for the original source of a job. The HTTP endpoint is /v1/job/<id>/submission?version=<version> and the RPC method is Job.GetJobSubmission. The job source (if submitted, and doing so is always optional), is stored in the job_submission memdb table, separately from the actual job. This way we do not incur overhead of reading the large string field throughout normal job operations. The server config now includes job_max_source_size for configuring the maximum size the job source may be, before the server simply drops the source material. This should help prevent Bad Things from happening when huge jobs are submitted. If the value is set to 0, all job source material will be dropped. * api: avoid writing var content to disk for parsing * api: move submission validation into RPC layer * api: return an error if updating a job submission without namespace or job id * api: be exact about the job index we associate a submission with (modify) * api: reword api docs scheduling * api: prune all but the last 6 job submissions * api: protect against nil job submission in job validation * api: set max job source size in test server * api: fixups from pr
748 lines
21 KiB
Go
748 lines
21 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package drainer
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/state"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
|
|
n1 := mock.Node()
|
|
n1.Name = "draining"
|
|
n1.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(time.Minute),
|
|
}
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1))
|
|
|
|
// Create a non-draining node
|
|
n2 := mock.Node()
|
|
n2.Name = "running"
|
|
require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2))
|
|
return n1, n2
|
|
}
|
|
|
|
func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
|
|
t.Helper()
|
|
|
|
limiter := rate.NewLimiter(100.0, 100)
|
|
logger := testlog.HCLogger(t)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
w := NewDrainingJobWatcher(ctx, limiter, state, logger)
|
|
return w, cancel
|
|
}
|
|
|
|
// TestDrainingJobWatcher_Interface is a compile-time assertion that we
|
|
// implement the intended interface.
|
|
func TestDrainingJobWatcher_Interface(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
|
|
cancel()
|
|
var _ DrainingJobWatcher = w
|
|
}
|
|
|
|
// asertJobWatcherOps asserts a certain number of allocs are drained and/or
|
|
// migrated by the job watcher.
|
|
func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) (
|
|
*DrainRequest, []*structs.Allocation) {
|
|
t.Helper()
|
|
var (
|
|
drains *DrainRequest
|
|
migrations []*structs.Allocation
|
|
drainsChecked, migrationsChecked bool
|
|
)
|
|
for {
|
|
select {
|
|
case drains = <-jw.Drain():
|
|
ids := make([]string, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
}
|
|
t.Logf("draining %d allocs: %v", len(ids), ids)
|
|
require.False(t, drainsChecked, "drains already received")
|
|
drainsChecked = true
|
|
require.Lenf(t, drains.Allocs, drained,
|
|
"expected %d drains but found %d", drained, len(drains.Allocs))
|
|
case migrations = <-jw.Migrated():
|
|
ids := make([]string, len(migrations))
|
|
for i, a := range migrations {
|
|
ids[i] = a.JobID[:6] + ":" + a.ID[:6]
|
|
}
|
|
t.Logf("migrating %d allocs: %v", len(ids), ids)
|
|
require.False(t, migrationsChecked, "migrations already received")
|
|
migrationsChecked = true
|
|
require.Lenf(t, migrations, migrated,
|
|
"expected %d migrations but found %d", migrated, len(migrations))
|
|
case <-time.After(10 * time.Millisecond):
|
|
if !drainsChecked && drained > 0 {
|
|
t.Fatalf("expected %d drains but none happened", drained)
|
|
}
|
|
if !migrationsChecked && migrated > 0 {
|
|
t.Fatalf("expected %d migrations but none happened", migrated)
|
|
}
|
|
return drains, migrations
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
|
|
// allocation changes from multiple jobs.
|
|
func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
|
|
ci.Parallel(t)
|
|
require := require.New(t)
|
|
|
|
state := state.TestStateStore(t)
|
|
jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state)
|
|
defer cancelWatcher()
|
|
drainingNode, runningNode := testNodes(t, state)
|
|
|
|
var index uint64 = 101
|
|
count := 8
|
|
|
|
newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
|
|
a := mock.Alloc()
|
|
a.JobID = job.ID
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = node.ID
|
|
return a
|
|
}
|
|
|
|
// 2 jobs with count 10, max parallel 3
|
|
jnss := make([]structs.NamespacedID, 2)
|
|
jobs := make([]*structs.Job, 2)
|
|
for i := 0; i < 2; i++ {
|
|
job := mock.Job()
|
|
jobs[i] = job
|
|
jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
|
|
job.TaskGroups[0].Migrate.MaxParallel = 3
|
|
job.TaskGroups[0].Count = count
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, index, nil, job))
|
|
index++
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < count; i++ {
|
|
a := newAlloc(drainingNode, job)
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs))
|
|
index++
|
|
|
|
}
|
|
|
|
// Only register jobs with watcher after creating all data models as
|
|
// once the watcher starts we need to track the index carefully for
|
|
// updating the batch future
|
|
jobWatcher.RegisterJobs(jnss)
|
|
|
|
// Expect a first batch of MaxParallel allocs from each job
|
|
drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0)
|
|
|
|
// Fake migrating the drained allocs by starting new ones and stopping
|
|
// the old ones
|
|
drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
// Just setting ShouldMigrate should not cause any further drains
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
// Proceed our fake migration along by creating new allocs and stopping
|
|
// old ones
|
|
replacements := make([]*structs.Allocation, len(drainedAllocs))
|
|
updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
// Stop drained allocs
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
// Create a replacement
|
|
replacement := mock.Alloc()
|
|
replacement.JobID = a.Job.ID
|
|
replacement.Job = a.Job
|
|
replacement.TaskGroup = a.TaskGroup
|
|
replacement.NodeID = runningNode.ID
|
|
// start in pending state with no health status
|
|
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
// The drained allocs stopping cause migrations but no new drains
|
|
// because the replacements have not started
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
|
|
|
// Finally kickoff further drain activity by "starting" replacements
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
index++
|
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
|
|
|
// 6 new drains
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0)
|
|
|
|
// Fake migrations once more to finish the drain
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 6)
|
|
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
index++
|
|
|
|
require.NotEmpty(jobWatcher.drainingJobs())
|
|
|
|
// Final 4 new drains
|
|
drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0)
|
|
|
|
// Fake migrations once more to finish the drain
|
|
drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
|
|
for i, a := range drains.Allocs {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
|
|
// create a copy so we can reuse this slice
|
|
drainedAllocs[i] = a.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
|
|
drains.Resp.Respond(index, nil)
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 0)
|
|
|
|
replacements = make([]*structs.Allocation, len(drainedAllocs))
|
|
updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
|
|
for i, a := range drainedAllocs {
|
|
a.DesiredTransition.Migrate = nil
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
replacement := newAlloc(runningNode, a.Job)
|
|
updates = append(updates, a, replacement)
|
|
replacements[i] = replacement.Copy()
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
|
|
index++
|
|
|
|
assertJobWatcherOps(t, jobWatcher, 0, 4)
|
|
|
|
for _, a := range replacements {
|
|
a.ClientStatus = structs.AllocClientStatusRunning
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
|
|
|
|
// No jobs should be left!
|
|
require.Empty(jobWatcher.drainingJobs())
|
|
}
|
|
|
|
// DrainingJobWatcher tests:
|
|
// TODO Test that the watcher cancels its query when a new job is registered
|
|
|
|
// handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup
|
|
//
|
|
// Two nodes will be initialized: one draining and one running.
|
|
type handleTaskGroupTestCase struct {
|
|
// Name of test
|
|
Name string
|
|
|
|
// Batch uses a batch job and alloc
|
|
Batch bool
|
|
|
|
// Expectations
|
|
ExpectedDrained int
|
|
ExpectedMigrated int
|
|
ExpectedDone bool
|
|
|
|
// Count overrides the default count of 10 if set
|
|
Count int
|
|
|
|
// MaxParallel overrides the default max_parallel of 1 if set
|
|
MaxParallel int
|
|
|
|
// AddAlloc will be called 10 times to create test allocs
|
|
//
|
|
// Allocs default to be healthy on the draining node
|
|
AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string)
|
|
}
|
|
|
|
func TestHandeTaskGroup_Table(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
cases := []handleTaskGroupTestCase{
|
|
{
|
|
// All allocs on draining node
|
|
Name: "AllDraining",
|
|
ExpectedDrained: 1,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
},
|
|
{
|
|
// All allocs on non-draining node
|
|
Name: "AllNonDraining",
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: true,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// Some allocs on non-draining node but not healthy
|
|
Name: "SomeNonDrainingUnhealthy",
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i%2 == 0 {
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// One draining, other allocs on non-draining node and healthy
|
|
Name: "OneDraining",
|
|
ExpectedDrained: 1,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i != 0 {
|
|
a.NodeID = runningID
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// One already draining, other allocs on non-draining node and healthy
|
|
Name: "OneAlreadyDraining",
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredTransition.Migrate = pointer.Of(true)
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// One already drained, other allocs on non-draining node and healthy
|
|
Name: "OneAlreadyDrained",
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 1,
|
|
ExpectedDone: true,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// One already drained, other allocs on non-draining node and healthy
|
|
Name: "OneAlreadyDrainedBatched",
|
|
Batch: true,
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 1,
|
|
ExpectedDone: true,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
if i == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
return
|
|
}
|
|
a.NodeID = runningID
|
|
},
|
|
},
|
|
{
|
|
// All allocs are terminl, nothing to be drained
|
|
Name: "AllMigrating",
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 10,
|
|
ExpectedDone: true,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
},
|
|
},
|
|
{
|
|
// All allocs are terminl, nothing to be drained
|
|
Name: "AllMigratingBatch",
|
|
Batch: true,
|
|
ExpectedDrained: 0,
|
|
ExpectedMigrated: 10,
|
|
ExpectedDone: true,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
},
|
|
},
|
|
{
|
|
// All allocs may be drained at once
|
|
Name: "AllAtOnce",
|
|
ExpectedDrained: 10,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
MaxParallel: 10,
|
|
},
|
|
{
|
|
// Drain 2
|
|
Name: "Drain2",
|
|
ExpectedDrained: 2,
|
|
ExpectedMigrated: 0,
|
|
ExpectedDone: false,
|
|
MaxParallel: 2,
|
|
},
|
|
{
|
|
// One on new node, one drained, and one draining
|
|
ExpectedDrained: 1,
|
|
ExpectedMigrated: 1,
|
|
MaxParallel: 2,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0:
|
|
// One alloc on running node
|
|
a.NodeID = runningID
|
|
case 1:
|
|
// One alloc already migrated
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// 8 on new node, one drained, and one draining
|
|
ExpectedDrained: 1,
|
|
ExpectedMigrated: 1,
|
|
MaxParallel: 2,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1, 2, 3, 4, 5, 6, 7:
|
|
a.NodeID = runningID
|
|
case 8:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// 5 on new node, two drained, and three draining
|
|
ExpectedDrained: 3,
|
|
ExpectedMigrated: 2,
|
|
MaxParallel: 5,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1, 2, 3, 4:
|
|
a.NodeID = runningID
|
|
case 8, 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// Not all on new node have health set
|
|
Name: "PendingHealth",
|
|
ExpectedDrained: 1,
|
|
ExpectedMigrated: 1,
|
|
MaxParallel: 3,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0:
|
|
// Deployment status UNset for 1 on new node
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
case 1, 2, 3, 4:
|
|
// Deployment status set for 4 on new node
|
|
a.NodeID = runningID
|
|
case 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
}
|
|
},
|
|
},
|
|
{
|
|
// 5 max parallel - 1 migrating - 2 with unset health = 2 drainable
|
|
Name: "PendingHealthHigherMax",
|
|
ExpectedDrained: 2,
|
|
ExpectedMigrated: 1,
|
|
MaxParallel: 5,
|
|
AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
|
|
switch i {
|
|
case 0, 1:
|
|
// Deployment status UNset for 2 on new node
|
|
a.NodeID = runningID
|
|
a.DeploymentStatus = nil
|
|
case 2, 3, 4:
|
|
// Deployment status set for 3 on new node
|
|
a.NodeID = runningID
|
|
case 9:
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
}
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, testCase := range cases {
|
|
t.Run(testCase.Name, func(t *testing.T) {
|
|
testHandleTaskGroup(t, testCase)
|
|
})
|
|
}
|
|
}
|
|
|
|
func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) {
|
|
ci.Parallel(t)
|
|
|
|
require := require.New(t)
|
|
assert := assert.New(t)
|
|
|
|
// Create nodes
|
|
state := state.TestStateStore(t)
|
|
drainingNode, runningNode := testNodes(t, state)
|
|
|
|
job := mock.Job()
|
|
if tc.Batch {
|
|
job = mock.BatchJob()
|
|
}
|
|
job.TaskGroups[0].Count = 10
|
|
if tc.Count > 0 {
|
|
job.TaskGroups[0].Count = tc.Count
|
|
}
|
|
if tc.MaxParallel > 0 {
|
|
job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel
|
|
}
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 102, nil, job))
|
|
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
if tc.Batch {
|
|
a = mock.BatchAlloc()
|
|
}
|
|
a.JobID = job.ID
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
// Default to being healthy on the draining node
|
|
a.NodeID = drainingNode.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(true),
|
|
}
|
|
if tc.AddAlloc != nil {
|
|
tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID)
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs))
|
|
snap, err := state.Snapshot()
|
|
require.Nil(err)
|
|
|
|
res := newJobResult()
|
|
require.Nil(handleTaskGroup(snap, tc.Batch, job.TaskGroups[0], allocs, 102, res))
|
|
assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d",
|
|
tc.ExpectedDrained, len(res.drain))
|
|
assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d",
|
|
tc.ExpectedMigrated, len(res.migrated))
|
|
assert.Equal(tc.ExpectedDone, res.done)
|
|
}
|
|
|
|
func TestHandleTaskGroup_Migrations(t *testing.T) {
|
|
ci.Parallel(t)
|
|
require := require.New(t)
|
|
|
|
// Create a draining node
|
|
state := state.TestStateStore(t)
|
|
n := mock.Node()
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: 5 * time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
}
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
|
|
|
job := mock.Job()
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job))
|
|
|
|
// Create 10 done allocs
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = n.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
}
|
|
|
|
if i%2 == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
} else {
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
|
|
|
snap, err := state.Snapshot()
|
|
require.Nil(err)
|
|
|
|
// Handle before and after indexes as both service and batch
|
|
res := newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 10)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 10)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
}
|
|
|
|
// This test asserts that handle task group works when an allocation is on a
|
|
// garbage collected node
|
|
func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) {
|
|
ci.Parallel(t)
|
|
require := require.New(t)
|
|
|
|
// Create a draining node
|
|
state := state.TestStateStore(t)
|
|
n := mock.Node()
|
|
n.DrainStrategy = &structs.DrainStrategy{
|
|
DrainSpec: structs.DrainSpec{
|
|
Deadline: 5 * time.Minute,
|
|
},
|
|
ForceDeadline: time.Now().Add(1 * time.Minute),
|
|
}
|
|
require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
|
|
|
|
job := mock.Job()
|
|
require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job))
|
|
|
|
// Create 10 done allocs
|
|
var allocs []*structs.Allocation
|
|
for i := 0; i < 10; i++ {
|
|
a := mock.Alloc()
|
|
a.Job = job
|
|
a.TaskGroup = job.TaskGroups[0].Name
|
|
a.NodeID = n.ID
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
Healthy: pointer.Of(false),
|
|
}
|
|
|
|
if i%2 == 0 {
|
|
a.DesiredStatus = structs.AllocDesiredStatusStop
|
|
} else {
|
|
a.ClientStatus = structs.AllocClientStatusFailed
|
|
}
|
|
allocs = append(allocs, a)
|
|
}
|
|
|
|
// Make the first one be on a GC'd node
|
|
allocs[0].NodeID = uuid.Generate()
|
|
require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
|
|
|
|
snap, err := state.Snapshot()
|
|
require.Nil(err)
|
|
|
|
// Handle before and after indexes as both service and batch
|
|
res := newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 9)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
|
|
require.Empty(res.drain)
|
|
require.Len(res.migrated, 9)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
|
|
res = newJobResult()
|
|
require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
|
|
require.Empty(res.drain)
|
|
require.Empty(res.migrated)
|
|
require.True(res.done)
|
|
}
|