2015-08-15 23:07:50 +00:00
|
|
|
package nomad
|
|
|
|
|
|
|
|
import (
|
2017-10-23 22:04:00 +00:00
|
|
|
"fmt"
|
2015-08-15 23:07:50 +00:00
|
|
|
"testing"
|
2015-08-16 00:42:51 +00:00
|
|
|
"time"
|
2015-08-15 23:07:50 +00:00
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
memdb "github.com/hashicorp/go-memdb"
|
2020-04-02 20:04:56 +00:00
|
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
2018-01-22 22:31:38 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
2015-08-15 23:07:50 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
2020-04-02 20:04:56 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
2015-08-15 23:07:50 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
"github.com/hashicorp/nomad/testutil"
|
2017-06-29 19:32:37 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2018-01-22 22:31:38 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2015-08-15 23:07:50 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestCoreScheduler_EvalGC(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2015-08-15 23:07:50 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
2018-01-22 22:31:38 +00:00
|
|
|
require := require.New(t)
|
2015-08-15 23:07:50 +00:00
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2015-08-15 23:07:50 +00:00
|
|
|
// Insert "dead" eval
|
|
|
|
state := s1.fsm.State()
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusFailed
|
2016-07-25 21:11:32 +00:00
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
2015-08-15 23:07:50 +00:00
|
|
|
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
2018-01-22 22:31:38 +00:00
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Insert mock job with rescheduling disabled
|
|
|
|
job := mock.Job()
|
|
|
|
job.ID = eval.JobID
|
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
2015-08-15 23:07:50 +00:00
|
|
|
}
|
2018-01-22 22:31:38 +00:00
|
|
|
err = state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
2015-08-15 23:07:50 +00:00
|
|
|
|
|
|
|
// Insert "dead" alloc
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.EvalID = eval.ID
|
2016-07-13 19:20:46 +00:00
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2016-07-25 21:11:32 +00:00
|
|
|
alloc.JobID = eval.JobID
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2016-08-04 18:24:17 +00:00
|
|
|
|
|
|
|
// Insert "lost" alloc
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
alloc2.JobID = eval.JobID
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
2016-08-04 18:24:17 +00:00
|
|
|
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
|
2015-08-15 23:07:50 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-08-16 00:42:51 +00:00
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
2015-08-15 23:07:50 +00:00
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
2015-08-15 23:07:50 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
2015-08-15 23:07:50 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
2015-08-15 23:07:50 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA != nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
2016-08-04 18:24:17 +00:00
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
2016-08-04 18:24:17 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 != nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
2016-03-25 23:46:48 +00:00
|
|
|
}
|
|
|
|
|
2018-01-22 22:31:38 +00:00
|
|
|
// Tests GC behavior on allocations being rescheduled
|
2018-03-11 18:40:32 +00:00
|
|
|
func TestCoreScheduler_EvalGC_ReschedulingAllocs(t *testing.T) {
|
2018-01-22 22:31:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2018-01-22 22:31:38 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert "dead" eval
|
|
|
|
state := s1.fsm.State()
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusFailed
|
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
|
|
|
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
|
|
|
require.Nil(err)
|
|
|
|
|
2018-01-30 20:45:59 +00:00
|
|
|
// Insert "pending" eval for same job
|
|
|
|
eval2 := mock.Eval()
|
|
|
|
eval2.JobID = eval.JobID
|
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval2.JobID))
|
|
|
|
err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
|
|
|
|
require.Nil(err)
|
|
|
|
|
2018-01-22 22:31:38 +00:00
|
|
|
// Insert mock job with default reschedule policy of 2 in 10 minutes
|
|
|
|
job := mock.Job()
|
|
|
|
job.ID = eval.JobID
|
|
|
|
|
|
|
|
err = state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Insert failed alloc with an old reschedule attempt, can be GCed
|
|
|
|
alloc := mock.Alloc()
|
2018-11-01 05:02:26 +00:00
|
|
|
alloc.Job = job
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
alloc.JobID = eval.JobID
|
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2018-04-11 18:58:02 +00:00
|
|
|
alloc.NextAllocation = uuid.Generate()
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.RescheduleTracker = &structs.RescheduleTracker{
|
|
|
|
Events: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
|
|
|
|
PrevNodeID: uuid.Generate(),
|
|
|
|
PrevAllocID: uuid.Generate(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
alloc2 := mock.Alloc()
|
2018-11-01 05:02:26 +00:00
|
|
|
alloc2.Job = job
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
2018-04-11 18:58:02 +00:00
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusFailed
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.JobID = eval.JobID
|
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
alloc2.RescheduleTracker = &structs.RescheduleTracker{
|
|
|
|
Events: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
PrevNodeID: uuid.Generate(),
|
|
|
|
PrevAllocID: uuid.Generate(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
2018-01-30 20:45:59 +00:00
|
|
|
// Attempt the GC, job has all terminal allocs and one pending eval
|
2018-01-22 22:31:38 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Eval should still exist
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
|
|
|
require.Nil(err)
|
2018-04-10 22:12:06 +00:00
|
|
|
require.NotNil(out)
|
2018-01-22 22:31:38 +00:00
|
|
|
require.Equal(eval.ID, out.ID)
|
|
|
|
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
require.Nil(err)
|
|
|
|
require.Nil(outA)
|
|
|
|
|
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
|
|
|
require.Nil(err)
|
|
|
|
require.Equal(alloc2.ID, outA2.ID)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-01-30 15:12:14 +00:00
|
|
|
// Tests GC behavior on stopped job with reschedulable allocs
|
|
|
|
func TestCoreScheduler_EvalGC_StoppedJob_Reschedulable(t *testing.T) {
|
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2018-01-30 15:12:14 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert "dead" eval
|
|
|
|
state := s1.fsm.State()
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusFailed
|
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
|
|
|
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Insert mock stopped job with default reschedule policy of 2 in 10 minutes
|
|
|
|
job := mock.Job()
|
|
|
|
job.ID = eval.JobID
|
|
|
|
job.Stop = true
|
|
|
|
|
|
|
|
err = state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Insert failed alloc with a recent reschedule attempt
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
alloc.JobID = eval.JobID
|
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
alloc.RescheduleTracker = &structs.RescheduleTracker{
|
|
|
|
Events: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
PrevNodeID: uuid.Generate(),
|
|
|
|
PrevAllocID: uuid.Generate(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc})
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
require.Nil(err)
|
|
|
|
|
|
|
|
// Eval should not exist
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
|
|
|
require.Nil(err)
|
|
|
|
require.Nil(out)
|
|
|
|
|
|
|
|
// Alloc should not exist
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
require.Nil(err)
|
|
|
|
require.Nil(outA)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-03-11 23:48:57 +00:00
|
|
|
// An EvalGC should never reap a batch job that has not been stopped
|
2016-06-27 22:47:49 +00:00
|
|
|
func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-06-11 01:32:37 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Insert a "dead" job
|
2016-06-11 01:32:37 +00:00
|
|
|
state := s1.fsm.State()
|
2016-06-27 22:47:49 +00:00
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-06-28 17:02:06 +00:00
|
|
|
// Insert "complete" eval
|
2016-06-11 01:32:37 +00:00
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
2016-06-27 22:47:49 +00:00
|
|
|
eval.Type = structs.JobTypeBatch
|
|
|
|
eval.JobID = job.ID
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
|
2016-06-11 01:32:37 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-06-28 17:02:06 +00:00
|
|
|
// Insert "failed" alloc
|
2016-06-11 01:32:37 +00:00
|
|
|
alloc := mock.Alloc()
|
2018-11-01 05:02:26 +00:00
|
|
|
alloc.Job = job
|
2016-06-27 22:47:49 +00:00
|
|
|
alloc.JobID = job.ID
|
2016-06-11 01:32:37 +00:00
|
|
|
alloc.EvalID = eval.ID
|
2016-07-13 19:20:46 +00:00
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2016-08-04 18:24:17 +00:00
|
|
|
|
|
|
|
// Insert "lost" alloc
|
|
|
|
alloc2 := mock.Alloc()
|
2018-11-01 05:02:26 +00:00
|
|
|
alloc2.Job = job
|
2016-08-04 18:24:17 +00:00
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
|
2016-06-11 01:32:37 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
2016-06-11 01:32:37 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Nothing should be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
2016-06-11 01:32:37 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
2016-06-11 01:32:37 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA == nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
2016-08-04 18:24:17 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
|
|
|
|
2017-09-07 23:56:15 +00:00
|
|
|
outB, err := state.JobByID(ws, job.Namespace, job.ID)
|
2016-06-11 01:32:37 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2016-06-27 22:47:49 +00:00
|
|
|
if outB == nil {
|
2016-06-11 01:32:37 +00:00
|
|
|
t.Fatalf("bad: %v", outB)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-01 05:02:26 +00:00
|
|
|
// An EvalGC should reap allocations from jobs with an older modify index
|
|
|
|
func TestCoreScheduler_EvalGC_Batch_OldVersion(t *testing.T) {
|
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2018-11-01 05:02:26 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert a "dead" job
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert "complete" eval
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
eval.Type = structs.JobTypeBatch
|
|
|
|
eval.JobID = job.ID
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert "failed" alloc
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.Job = job
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
// Insert "lost" alloc
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.Job = job
|
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
|
|
|
|
// Insert alloc with older job modifyindex
|
|
|
|
alloc3 := mock.Alloc()
|
|
|
|
job2 := job.Copy()
|
|
|
|
|
|
|
|
alloc3.Job = job2
|
|
|
|
alloc3.JobID = job2.ID
|
|
|
|
alloc3.EvalID = eval.ID
|
2018-11-09 17:44:21 +00:00
|
|
|
job2.CreateIndex = 500
|
2018-11-01 05:02:26 +00:00
|
|
|
alloc3.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc3.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2, alloc3})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Alloc1 and 2 should be there, and alloc3 should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA == nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA3, err := state.AllocByID(ws, alloc3.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA3 != nil {
|
|
|
|
t.Fatalf("expected alloc to be nil:%v", outA2)
|
|
|
|
}
|
|
|
|
|
|
|
|
outB, err := state.JobByID(ws, job.Namespace, job.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outB == nil {
|
|
|
|
t.Fatalf("bad: %v", outB)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-11 23:48:57 +00:00
|
|
|
// An EvalGC should reap a batch job that has been stopped
|
|
|
|
func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-03-11 23:48:57 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2018-01-22 22:31:38 +00:00
|
|
|
require := require.New(t)
|
2017-03-11 23:48:57 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Create a "dead" job
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
2018-01-22 22:31:38 +00:00
|
|
|
job.Stop = true
|
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
|
|
|
}
|
|
|
|
err := state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
2017-03-11 23:48:57 +00:00
|
|
|
|
|
|
|
// Insert "complete" eval
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
eval.Type = structs.JobTypeBatch
|
|
|
|
eval.JobID = job.ID
|
2018-01-22 22:31:38 +00:00
|
|
|
err = state.UpsertEvals(1002, []*structs.Evaluation{eval})
|
|
|
|
require.Nil(err)
|
2017-03-11 23:48:57 +00:00
|
|
|
|
|
|
|
// Insert "failed" alloc
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.EvalID = eval.ID
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2017-03-11 23:48:57 +00:00
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
// Insert "lost" alloc
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusLost
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
2017-03-11 23:48:57 +00:00
|
|
|
|
2018-01-22 22:31:38 +00:00
|
|
|
err = state.UpsertAllocs(1003, []*structs.Allocation{alloc, alloc2})
|
2017-03-11 23:48:57 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Everything should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA != nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 != nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-03-25 23:46:48 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
2018-01-22 22:31:38 +00:00
|
|
|
require := require.New(t)
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-03-25 23:46:48 +00:00
|
|
|
// Insert "dead" eval
|
|
|
|
state := s1.fsm.State()
|
|
|
|
eval := mock.Eval()
|
2016-06-27 22:47:49 +00:00
|
|
|
eval.Status = structs.EvalStatusComplete
|
2016-07-25 21:11:32 +00:00
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
2016-03-25 23:46:48 +00:00
|
|
|
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2018-01-30 20:45:59 +00:00
|
|
|
// Create mock job with id same as eval
|
2018-01-22 22:31:38 +00:00
|
|
|
job := mock.Job()
|
|
|
|
job.ID = eval.JobID
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Insert "dead" alloc
|
2016-03-25 23:46:48 +00:00
|
|
|
alloc := mock.Alloc()
|
2018-01-30 20:45:59 +00:00
|
|
|
alloc.JobID = job.ID
|
2016-03-25 23:46:48 +00:00
|
|
|
alloc.EvalID = eval.ID
|
2016-07-13 19:20:46 +00:00
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2016-07-25 21:11:32 +00:00
|
|
|
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
|
2016-08-04 18:24:17 +00:00
|
|
|
|
|
|
|
// Insert "lost" alloc
|
|
|
|
alloc2 := mock.Alloc()
|
2018-01-30 20:45:59 +00:00
|
|
|
alloc2.JobID = job.ID
|
2016-08-04 18:24:17 +00:00
|
|
|
alloc2.EvalID = eval.ID
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
2016-08-04 18:24:17 +00:00
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusLost
|
|
|
|
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2016-06-22 18:40:27 +00:00
|
|
|
|
|
|
|
// Insert "running" alloc
|
2016-08-04 18:24:17 +00:00
|
|
|
alloc3 := mock.Alloc()
|
|
|
|
alloc3.EvalID = eval.ID
|
2018-01-30 20:45:59 +00:00
|
|
|
alloc3.JobID = job.ID
|
2016-08-04 18:24:17 +00:00
|
|
|
state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID))
|
|
|
|
err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3})
|
2016-03-25 23:46:48 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2018-01-30 20:45:59 +00:00
|
|
|
// Insert mock job with rescheduling disabled
|
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
|
|
|
}
|
|
|
|
err = state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
|
|
|
|
2016-03-25 23:46:48 +00:00
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
2016-03-25 23:46:48 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Should not be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
2016-03-25 23:46:48 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err := state.AllocByID(ws, alloc3.ID)
|
2016-03-25 23:46:48 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA == nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
2016-04-08 18:42:02 +00:00
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Should be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
outB, err := state.AllocByID(ws, alloc.ID)
|
2016-04-08 18:42:02 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2016-06-27 22:47:49 +00:00
|
|
|
if outB != nil {
|
|
|
|
t.Fatalf("bad: %v", outB)
|
2016-04-08 18:42:02 +00:00
|
|
|
}
|
2016-08-04 18:24:17 +00:00
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outC, err := state.AllocByID(ws, alloc2.ID)
|
2016-08-04 18:24:17 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outC != nil {
|
|
|
|
t.Fatalf("bad: %v", outC)
|
|
|
|
}
|
2016-04-08 18:42:02 +00:00
|
|
|
}
|
|
|
|
|
2016-02-20 23:50:41 +00:00
|
|
|
func TestCoreScheduler_EvalGC_Force(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2017-10-23 22:04:00 +00:00
|
|
|
for _, withAcl := range []bool{false, true} {
|
|
|
|
t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
|
2018-01-22 22:31:38 +00:00
|
|
|
require := require.New(t)
|
2017-10-23 22:04:00 +00:00
|
|
|
var server *Server
|
2019-12-04 00:15:11 +00:00
|
|
|
var cleanup func()
|
2017-10-23 22:04:00 +00:00
|
|
|
if withAcl {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, _, cleanup = TestACLServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
} else {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, cleanup = TestServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
}
|
2019-12-04 00:15:11 +00:00
|
|
|
defer cleanup()
|
2017-10-23 22:04:00 +00:00
|
|
|
testutil.WaitForLeader(t, server.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert "dead" eval
|
|
|
|
state := server.fsm.State()
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.Status = structs.EvalStatusFailed
|
|
|
|
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
|
|
|
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2018-01-22 22:31:38 +00:00
|
|
|
// Insert mock job with rescheduling disabled
|
|
|
|
job := mock.Job()
|
|
|
|
job.ID = eval.JobID
|
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
|
|
|
}
|
|
|
|
err = state.UpsertJob(1001, job)
|
|
|
|
require.Nil(err)
|
|
|
|
|
2017-10-23 22:04:00 +00:00
|
|
|
// Insert "dead" alloc
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2017-10-23 22:04:00 +00:00
|
|
|
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(server, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := server.coreJobEval(structs.CoreJobForceGC, 1002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.EvalByID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA != nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
})
|
2016-02-20 23:50:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-07 18:01:29 +00:00
|
|
|
func TestCoreScheduler_NodeGC(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2017-10-23 22:04:00 +00:00
|
|
|
for _, withAcl := range []bool{false, true} {
|
|
|
|
t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
|
|
|
|
var server *Server
|
2019-12-04 00:15:11 +00:00
|
|
|
var cleanup func()
|
2017-10-23 22:04:00 +00:00
|
|
|
if withAcl {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, _, cleanup = TestACLServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
} else {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, cleanup = TestServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
}
|
2019-12-04 00:15:11 +00:00
|
|
|
defer cleanup()
|
2017-10-23 22:04:00 +00:00
|
|
|
testutil.WaitForLeader(t, server.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert "dead" node
|
|
|
|
state := server.fsm.State()
|
|
|
|
node := mock.Node()
|
|
|
|
node.Status = structs.NodeStatusDown
|
|
|
|
err := state.UpsertNode(1000, node)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := server.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*server.config.NodeGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(server, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := server.coreJobEval(structs.CoreJobNodeGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.NodeByID(ws, node.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
})
|
2015-09-07 18:01:29 +00:00
|
|
|
}
|
|
|
|
}
|
2015-12-15 03:20:57 +00:00
|
|
|
|
2016-06-03 23:24:41 +00:00
|
|
|
func TestCoreScheduler_NodeGC_TerminalAllocs(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-06-03 23:24:41 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-06-03 23:24:41 +00:00
|
|
|
// Insert "dead" node
|
|
|
|
state := s1.fsm.State()
|
|
|
|
node := mock.Node()
|
|
|
|
node.Status = structs.NodeStatusDown
|
|
|
|
err := state.UpsertNode(1000, node)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert a terminal alloc on that node
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2016-07-25 21:11:32 +00:00
|
|
|
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
|
|
|
|
if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
|
2016-06-03 23:24:41 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
|
2016-06-03 23:24:41 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.NodeByID(ws, node.ID)
|
2016-06-03 23:24:41 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestCoreScheduler_NodeGC_RunningAllocs(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-06-03 23:24:41 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-06-03 23:24:41 +00:00
|
|
|
// Insert "dead" node
|
|
|
|
state := s1.fsm.State()
|
|
|
|
node := mock.Node()
|
|
|
|
node.Status = structs.NodeStatusDown
|
|
|
|
err := state.UpsertNode(1000, node)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert a running alloc on that node
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.NodeID = node.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusRunning
|
2016-07-25 21:11:32 +00:00
|
|
|
state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
|
|
|
|
if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
|
2016-06-03 23:24:41 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
|
2016-06-03 23:24:41 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still be here
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.NodeByID(ws, node.ID)
|
2016-06-03 23:24:41 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-20 23:50:41 +00:00
|
|
|
func TestCoreScheduler_NodeGC_Force(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-02-20 23:50:41 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-02-20 23:50:41 +00:00
|
|
|
// Insert "dead" node
|
|
|
|
state := s1.fsm.State()
|
|
|
|
node := mock.Node()
|
|
|
|
node.Status = structs.NodeStatusDown
|
|
|
|
err := state.UpsertNode(1000, node)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
2016-06-22 16:04:22 +00:00
|
|
|
gc := s1.coreJobEval(structs.CoreJobForceGC, 1000)
|
2016-02-20 23:50:41 +00:00
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should be gone
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.NodeByID(ws, node.ID)
|
2016-02-20 23:50:41 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-25 21:56:23 +00:00
|
|
|
func TestCoreScheduler_JobGC_OutstandingEvals(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-07-25 21:56:23 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-07-25 21:56:23 +00:00
|
|
|
// Insert job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert two evals, one terminal and one not
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.JobID = job.ID
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
|
|
|
|
eval2 := mock.Eval()
|
|
|
|
eval2.JobID = job.ID
|
|
|
|
eval2.Status = structs.EvalStatusPending
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still exist
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE, err := state.EvalByID(ws, eval.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE == nil {
|
|
|
|
t.Fatalf("bad: %v", outE)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE2, err := state.EvalByID(ws, eval2.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outE2)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the second eval to be terminal
|
|
|
|
eval2.Status = structs.EvalStatusComplete
|
|
|
|
err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err = state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core = NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should not still exist
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err = state.JobByID(ws, job.Namespace, job.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE, err = state.EvalByID(ws, eval.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE != nil {
|
|
|
|
t.Fatalf("bad: %v", outE)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE2, err = state.EvalByID(ws, eval2.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE2 != nil {
|
|
|
|
t.Fatalf("bad: %v", outE2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-07-25 21:56:23 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-07-25 21:56:23 +00:00
|
|
|
// Insert job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
2018-01-22 22:31:38 +00:00
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
|
|
|
}
|
2016-07-25 21:56:23 +00:00
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert an eval
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.JobID = job.ID
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert two allocs, one terminal and one not
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2016-07-25 21:56:23 +00:00
|
|
|
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.EvalID = eval.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusRunning
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
2016-07-25 21:56:23 +00:00
|
|
|
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still exist
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA == nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the second alloc to be terminal
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
err = state.UpsertAllocs(1003, []*structs.Allocation{alloc2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err = state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core = NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should not still exist
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err = state.JobByID(ws, job.Namespace, job.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err = state.AllocByID(ws, alloc.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA != nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA2, err = state.AllocByID(ws, alloc2.ID)
|
2016-07-25 21:56:23 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 != nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
2015-12-15 03:20:57 +00:00
|
|
|
}
|
|
|
|
}
|
2016-02-20 23:50:41 +00:00
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// This test ensures that batch jobs are GC'd in one shot, meaning it all
|
|
|
|
// allocs/evals and job or nothing
|
|
|
|
func TestCoreScheduler_JobGC_OneShot(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-06-27 22:47:49 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-06-27 22:47:49 +00:00
|
|
|
// Insert job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert two complete evals
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.JobID = job.ID
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
|
|
|
|
eval2 := mock.Eval()
|
|
|
|
eval2.JobID = job.ID
|
|
|
|
eval2.Status = structs.EvalStatusComplete
|
|
|
|
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert one complete alloc and one running on distinct evals
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.EvalID = eval2.ID
|
|
|
|
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
|
|
|
|
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Force the jobs state to dead
|
|
|
|
job.Status = structs.JobStatusDead
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still exist
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE, err := state.EvalByID(ws, eval.ID)
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE == nil {
|
|
|
|
t.Fatalf("bad: %v", outE)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outE2, err := state.EvalByID(ws, eval2.ID)
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outE2)
|
|
|
|
}
|
|
|
|
|
2017-02-08 05:22:48 +00:00
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA == nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
2017-02-08 05:22:48 +00:00
|
|
|
outA2, err := state.AllocByID(ws, alloc2.ID)
|
2016-06-27 22:47:49 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA2 == nil {
|
|
|
|
t.Fatalf("bad: %v", outA2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-15 23:47:19 +00:00
|
|
|
// This test ensures that stopped jobs are GCd
|
|
|
|
func TestCoreScheduler_JobGC_Stopped(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-04-15 23:47:19 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Stop = true
|
2018-01-22 22:31:38 +00:00
|
|
|
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
|
|
|
Attempts: 0,
|
|
|
|
Interval: 0 * time.Second,
|
|
|
|
}
|
2017-04-15 23:47:19 +00:00
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert two complete evals
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.JobID = job.ID
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
|
|
|
|
eval2 := mock.Eval()
|
|
|
|
eval2.JobID = job.ID
|
|
|
|
eval2.Status = structs.EvalStatusComplete
|
|
|
|
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert one complete alloc
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.JobID = job.ID
|
|
|
|
alloc.EvalID = eval.ID
|
|
|
|
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
2018-01-22 22:31:38 +00:00
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
2017-04-15 23:47:19 +00:00
|
|
|
err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shouldn't still exist
|
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2017-04-15 23:47:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
outE, err := state.EvalByID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE != nil {
|
|
|
|
t.Fatalf("bad: %v", outE)
|
|
|
|
}
|
|
|
|
|
|
|
|
outE2, err := state.EvalByID(ws, eval2.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE2 != nil {
|
|
|
|
t.Fatalf("bad: %v", outE2)
|
|
|
|
}
|
|
|
|
|
|
|
|
outA, err := state.AllocByID(ws, alloc.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outA != nil {
|
|
|
|
t.Fatalf("bad: %v", outA)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-20 23:50:41 +00:00
|
|
|
func TestCoreScheduler_JobGC_Force(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2017-10-23 22:04:00 +00:00
|
|
|
for _, withAcl := range []bool{false, true} {
|
|
|
|
t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
|
|
|
|
var server *Server
|
2019-12-04 00:15:11 +00:00
|
|
|
var cleanup func()
|
2017-10-23 22:04:00 +00:00
|
|
|
if withAcl {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, _, cleanup = TestACLServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
} else {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, cleanup = TestServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
}
|
2019-12-04 00:15:11 +00:00
|
|
|
defer cleanup()
|
2017-10-23 22:04:00 +00:00
|
|
|
testutil.WaitForLeader(t, server.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert job.
|
|
|
|
state := server.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusDead
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert a terminal eval
|
|
|
|
eval := mock.Eval()
|
|
|
|
eval.JobID = job.ID
|
|
|
|
eval.Status = structs.EvalStatusComplete
|
|
|
|
err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(server, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := server.coreJobEval(structs.CoreJobForceGC, 1002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shouldn't still exist
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
outE, err := state.EvalByID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if outE != nil {
|
|
|
|
t.Fatalf("bad: %v", outE)
|
|
|
|
}
|
|
|
|
})
|
2016-02-20 23:50:41 +00:00
|
|
|
}
|
|
|
|
}
|
2016-03-30 22:17:13 +00:00
|
|
|
|
2017-04-15 23:47:19 +00:00
|
|
|
// This test ensures parameterized jobs only get gc'd when stopped
|
|
|
|
func TestCoreScheduler_JobGC_Parameterized(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-01-26 19:57:32 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert a parameterized job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.Job()
|
|
|
|
job.Type = structs.JobTypeBatch
|
|
|
|
job.Status = structs.JobStatusRunning
|
|
|
|
job.ParameterizedJob = &structs.ParameterizedJobConfig{
|
|
|
|
Payload: structs.DispatchPayloadRequired,
|
|
|
|
}
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2017-04-15 23:47:19 +00:00
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still exist
|
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2017-04-15 23:47:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark the job as stopped and try again
|
|
|
|
job2 := job.Copy()
|
|
|
|
job2.Stop = true
|
|
|
|
err = state.UpsertJob(2000, job2)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err = state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core = NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc = s1.coreJobEval(structs.CoreJobForceGC, 2002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should not exist
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err = state.JobByID(ws, job.Namespace, job.ID)
|
2017-04-15 23:47:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %+v", out)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-11 17:36:28 +00:00
|
|
|
// This test ensures periodic jobs don't get GCd until they are stopped
|
2017-04-15 23:47:19 +00:00
|
|
|
func TestCoreScheduler_JobGC_Periodic(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2017-04-15 23:47:19 +00:00
|
|
|
|
2019-12-04 00:15:11 +00:00
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-04-15 23:47:19 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert a parameterized job.
|
|
|
|
state := s1.fsm.State()
|
|
|
|
job := mock.PeriodicJob()
|
|
|
|
err := state.UpsertJob(1000, job)
|
|
|
|
if err != nil {
|
2017-01-26 19:57:32 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should still exist
|
2017-02-08 05:22:48 +00:00
|
|
|
ws := memdb.NewWatchSet()
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err := state.JobByID(ws, job.Namespace, job.ID)
|
2017-01-26 19:57:32 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
t.Fatalf("bad: %v", out)
|
|
|
|
}
|
|
|
|
|
2017-04-15 23:47:19 +00:00
|
|
|
// Mark the job as stopped and try again
|
|
|
|
job2 := job.Copy()
|
|
|
|
job2.Stop = true
|
|
|
|
err = state.UpsertJob(2000, job2)
|
2017-01-26 19:57:32 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2017-04-15 23:47:19 +00:00
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err = state.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core = NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc = s1.coreJobEval(structs.CoreJobForceGC, 2002)
|
|
|
|
err = core.Process(gc)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Should not exist
|
2017-09-07 23:56:15 +00:00
|
|
|
out, err = state.JobByID(ws, job.Namespace, job.ID)
|
2017-04-15 23:47:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
t.Fatalf("bad: %+v", out)
|
2017-01-26 19:57:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-29 19:32:37 +00:00
|
|
|
func TestCoreScheduler_DeploymentGC(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-06-29 19:32:37 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2018-03-11 17:57:49 +00:00
|
|
|
// Insert an active, terminal, and terminal with allocations deployment
|
2017-06-29 19:32:37 +00:00
|
|
|
state := s1.fsm.State()
|
2017-07-14 20:02:39 +00:00
|
|
|
d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
|
2017-06-29 19:32:37 +00:00
|
|
|
d1.Status = structs.DeploymentStatusFailed
|
2017-07-14 20:02:39 +00:00
|
|
|
d3.Status = structs.DeploymentStatusSuccessful
|
2017-07-04 20:31:01 +00:00
|
|
|
assert.Nil(state.UpsertDeployment(1000, d1), "UpsertDeployment")
|
|
|
|
assert.Nil(state.UpsertDeployment(1001, d2), "UpsertDeployment")
|
2017-07-14 20:02:39 +00:00
|
|
|
assert.Nil(state.UpsertDeployment(1002, d3), "UpsertDeployment")
|
|
|
|
|
|
|
|
a := mock.Alloc()
|
|
|
|
a.JobID = d3.JobID
|
|
|
|
a.DeploymentID = d3.ID
|
|
|
|
assert.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a}), "UpsertAllocs")
|
2017-06-29 19:32:37 +00:00
|
|
|
|
|
|
|
// Update the time tables to make this work
|
|
|
|
tt := s1.fsm.TimeTable()
|
|
|
|
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.DeploymentGCThreshold))
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
assert.Nil(err, "Snapshot")
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := s1.coreJobEval(structs.CoreJobDeploymentGC, 2000)
|
|
|
|
assert.Nil(core.Process(gc), "Process GC")
|
|
|
|
|
|
|
|
// Should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.DeploymentByID(ws, d1.ID)
|
|
|
|
assert.Nil(err, "DeploymentByID")
|
|
|
|
assert.Nil(out, "Terminal Deployment")
|
|
|
|
out2, err := state.DeploymentByID(ws, d2.ID)
|
|
|
|
assert.Nil(err, "DeploymentByID")
|
|
|
|
assert.NotNil(out2, "Active Deployment")
|
2017-07-14 20:02:39 +00:00
|
|
|
out3, err := state.DeploymentByID(ws, d3.ID)
|
|
|
|
assert.Nil(err, "DeploymentByID")
|
|
|
|
assert.NotNil(out3, "Terminal Deployment With Allocs")
|
2017-06-29 19:32:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestCoreScheduler_DeploymentGC_Force(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2017-10-23 22:04:00 +00:00
|
|
|
for _, withAcl := range []bool{false, true} {
|
|
|
|
t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
|
|
|
|
var server *Server
|
2019-12-04 00:15:11 +00:00
|
|
|
var cleanup func()
|
2017-10-23 22:04:00 +00:00
|
|
|
if withAcl {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, _, cleanup = TestACLServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
} else {
|
2019-12-04 00:15:11 +00:00
|
|
|
server, cleanup = TestServer(t, nil)
|
2017-10-23 22:04:00 +00:00
|
|
|
}
|
2019-12-04 00:15:11 +00:00
|
|
|
defer cleanup()
|
2017-10-23 22:04:00 +00:00
|
|
|
testutil.WaitForLeader(t, server.RPC)
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Insert terminal and active deployment
|
|
|
|
state := server.fsm.State()
|
|
|
|
d1, d2 := mock.Deployment(), mock.Deployment()
|
|
|
|
d1.Status = structs.DeploymentStatusFailed
|
|
|
|
assert.Nil(state.UpsertDeployment(1000, d1), "UpsertDeployment")
|
|
|
|
assert.Nil(state.UpsertDeployment(1001, d2), "UpsertDeployment")
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := state.Snapshot()
|
|
|
|
assert.Nil(err, "Snapshot")
|
|
|
|
core := NewCoreScheduler(server, snap)
|
|
|
|
|
|
|
|
// Attempt the GC
|
|
|
|
gc := server.coreJobEval(structs.CoreJobForceGC, 1000)
|
|
|
|
assert.Nil(core.Process(gc), "Process Force GC")
|
|
|
|
|
|
|
|
// Should be gone
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := state.DeploymentByID(ws, d1.ID)
|
|
|
|
assert.Nil(err, "DeploymentByID")
|
|
|
|
assert.Nil(out, "Terminal Deployment")
|
|
|
|
out2, err := state.DeploymentByID(ws, d2.ID)
|
|
|
|
assert.Nil(err, "DeploymentByID")
|
|
|
|
assert.NotNil(out2, "Active Deployment")
|
|
|
|
})
|
|
|
|
}
|
2017-06-29 19:32:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestCoreScheduler_PartitionEvalReap(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2016-03-30 22:17:13 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
2016-08-11 21:36:22 +00:00
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
2016-03-30 22:17:13 +00:00
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := s1.fsm.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Set the max ids per reap to something lower.
|
|
|
|
maxIdsPerReap = 2
|
|
|
|
|
|
|
|
evals := []string{"a", "b", "c"}
|
|
|
|
allocs := []string{"1", "2", "3"}
|
2017-06-29 19:32:37 +00:00
|
|
|
requests := core.(*CoreScheduler).partitionEvalReap(evals, allocs)
|
2016-03-30 22:17:13 +00:00
|
|
|
if len(requests) != 3 {
|
|
|
|
t.Fatalf("Expected 3 requests got: %v", requests)
|
|
|
|
}
|
|
|
|
|
|
|
|
first := requests[0]
|
2016-04-14 18:41:04 +00:00
|
|
|
if len(first.Allocs) != 2 && len(first.Evals) != 0 {
|
2016-03-30 22:17:13 +00:00
|
|
|
t.Fatalf("Unexpected first request: %v", first)
|
|
|
|
}
|
|
|
|
|
|
|
|
second := requests[1]
|
2016-04-14 18:41:04 +00:00
|
|
|
if len(second.Allocs) != 1 && len(second.Evals) != 1 {
|
2016-03-30 22:17:13 +00:00
|
|
|
t.Fatalf("Unexpected second request: %v", second)
|
|
|
|
}
|
|
|
|
|
|
|
|
third := requests[2]
|
2016-04-14 18:41:04 +00:00
|
|
|
if len(third.Allocs) != 0 && len(third.Evals) != 2 {
|
2016-03-30 22:17:13 +00:00
|
|
|
t.Fatalf("Unexpected third request: %v", third)
|
|
|
|
}
|
|
|
|
}
|
2017-06-29 19:32:37 +00:00
|
|
|
|
|
|
|
func TestCoreScheduler_PartitionDeploymentReap(t *testing.T) {
|
2017-07-23 22:04:38 +00:00
|
|
|
t.Parallel()
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2017-06-29 19:32:37 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
|
|
|
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := s1.fsm.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Set the max ids per reap to something lower.
|
|
|
|
maxIdsPerReap = 2
|
|
|
|
|
|
|
|
deployments := []string{"a", "b", "c"}
|
|
|
|
requests := core.(*CoreScheduler).partitionDeploymentReap(deployments)
|
|
|
|
if len(requests) != 2 {
|
|
|
|
t.Fatalf("Expected 2 requests got: %v", requests)
|
|
|
|
}
|
|
|
|
|
|
|
|
first := requests[0]
|
|
|
|
if len(first.Deployments) != 2 {
|
|
|
|
t.Fatalf("Unexpected first request: %v", first)
|
|
|
|
}
|
|
|
|
|
|
|
|
second := requests[1]
|
|
|
|
if len(second.Deployments) != 1 {
|
|
|
|
t.Fatalf("Unexpected second request: %v", second)
|
|
|
|
}
|
|
|
|
}
|
2018-01-30 15:12:14 +00:00
|
|
|
|
2018-03-14 23:06:37 +00:00
|
|
|
func TestCoreScheduler_PartitionJobReap(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
require := require.New(t)
|
2019-12-04 00:15:11 +00:00
|
|
|
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
|
|
defer cleanupS1()
|
2018-03-14 23:06:37 +00:00
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
|
|
|
|
// Create a core scheduler
|
|
|
|
snap, err := s1.fsm.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
core := NewCoreScheduler(s1, snap)
|
|
|
|
|
|
|
|
// Set the max ids per reap to something lower.
|
|
|
|
maxIdsPerReap = 2
|
|
|
|
|
|
|
|
jobs := []*structs.Job{mock.Job(), mock.Job(), mock.Job()}
|
|
|
|
requests := core.(*CoreScheduler).partitionJobReap(jobs, "")
|
|
|
|
require.Len(requests, 2)
|
|
|
|
|
|
|
|
first := requests[0]
|
|
|
|
second := requests[1]
|
|
|
|
require.Len(first.Jobs, 2)
|
|
|
|
require.Len(second.Jobs, 1)
|
|
|
|
}
|
|
|
|
|
2018-01-30 15:12:14 +00:00
|
|
|
// Tests various scenarios when allocations are eligible to be GCed
|
|
|
|
func TestAllocation_GCEligible(t *testing.T) {
|
|
|
|
type testCase struct {
|
2018-11-01 05:02:26 +00:00
|
|
|
Desc string
|
|
|
|
GCTime time.Time
|
|
|
|
ClientStatus string
|
|
|
|
DesiredStatus string
|
|
|
|
JobStatus string
|
|
|
|
JobStop bool
|
|
|
|
AllocJobModifyIndex uint64
|
|
|
|
JobModifyIndex uint64
|
|
|
|
ModifyIndex uint64
|
|
|
|
NextAllocID string
|
|
|
|
ReschedulePolicy *structs.ReschedulePolicy
|
|
|
|
RescheduleTrackers []*structs.RescheduleEvent
|
|
|
|
ThresholdIndex uint64
|
|
|
|
ShouldGC bool
|
2018-01-30 15:12:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fail := time.Now()
|
|
|
|
|
|
|
|
harness := []testCase{
|
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when non terminal",
|
2018-01-30 15:12:14 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
2018-01-30 22:14:53 +00:00
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when non terminal and job stopped",
|
2018-01-30 22:14:53 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
JobStop: true,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when non terminal and job dead",
|
2018-01-30 22:14:53 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
JobStatus: structs.JobStatusDead,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
2018-12-05 21:01:12 +00:00
|
|
|
{
|
|
|
|
Desc: "Don't GC when non terminal on client and job dead",
|
|
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
|
|
JobStatus: structs.JobStatusDead,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
2018-01-30 15:12:14 +00:00
|
|
|
{
|
2018-04-11 18:58:02 +00:00
|
|
|
Desc: "GC when terminal but not failed ",
|
|
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-04-11 20:12:23 +00:00
|
|
|
ModifyIndex: 90,
|
2018-04-11 18:58:02 +00:00
|
|
|
ThresholdIndex: 90,
|
|
|
|
ReschedulePolicy: nil,
|
2018-04-11 20:12:23 +00:00
|
|
|
ShouldGC: true,
|
2018-04-11 18:58:02 +00:00
|
|
|
},
|
2018-01-30 15:12:14 +00:00
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when threshold not met",
|
2018-01-30 15:12:14 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 100,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ReschedulePolicy: nil,
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC when no reschedule policy",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
|
|
|
ReschedulePolicy: nil,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC when empty policy",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 0, Interval: 0 * time.Minute},
|
2018-01-30 15:12:14 +00:00
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when no previous reschedule attempts",
|
2018-01-30 15:12:14 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 1, Interval: 1 * time.Minute},
|
2018-01-30 15:12:14 +00:00
|
|
|
ShouldGC: false,
|
|
|
|
},
|
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when prev reschedule attempt within interval",
|
2018-01-30 15:12:14 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 2, Interval: 30 * time.Minute},
|
2018-01-30 15:12:14 +00:00
|
|
|
GCTime: fail,
|
|
|
|
ModifyIndex: 90,
|
|
|
|
ThresholdIndex: 90,
|
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
ShouldGC: false,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC with prev reschedule attempt outside interval",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
|
2018-01-30 15:12:14 +00:00
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC when next alloc id is set",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
|
2018-01-30 15:12:14 +00:00
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
NextAllocID: uuid.Generate(),
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
2018-04-11 18:58:02 +00:00
|
|
|
{
|
2018-04-11 20:12:23 +00:00
|
|
|
Desc: "Don't GC when next alloc id is not set and unlimited restarts",
|
2018-04-11 18:58:02 +00:00
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "constant"},
|
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
ShouldGC: false,
|
2018-01-30 15:12:14 +00:00
|
|
|
},
|
2018-01-30 22:14:53 +00:00
|
|
|
{
|
|
|
|
Desc: "GC when job is stopped",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
|
2018-01-30 22:14:53 +00:00
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
JobStop: true,
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC when job status is dead",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
|
|
GCTime: fail,
|
2018-02-28 18:21:27 +00:00
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
|
2018-01-30 22:14:53 +00:00
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
JobStatus: structs.JobStatusDead,
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
2018-05-21 18:28:31 +00:00
|
|
|
{
|
|
|
|
Desc: "GC when desired status is stop, unlimited reschedule policy, no previous reschedule events",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
|
|
GCTime: fail,
|
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "constant"},
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Desc: "GC when desired status is stop, limited reschedule policy, some previous reschedule events",
|
|
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
|
|
GCTime: fail,
|
|
|
|
ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
|
|
|
|
RescheduleTrackers: []*structs.RescheduleEvent{
|
|
|
|
{
|
|
|
|
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
ShouldGC: true,
|
|
|
|
},
|
2018-01-30 15:12:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, tc := range harness {
|
|
|
|
alloc := &structs.Allocation{}
|
|
|
|
alloc.ModifyIndex = tc.ModifyIndex
|
|
|
|
alloc.DesiredStatus = tc.DesiredStatus
|
|
|
|
alloc.ClientStatus = tc.ClientStatus
|
2018-02-28 18:21:27 +00:00
|
|
|
alloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.RescheduleTrackers}
|
2018-01-30 22:14:53 +00:00
|
|
|
alloc.NextAllocation = tc.NextAllocID
|
|
|
|
job := mock.Job()
|
|
|
|
alloc.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
|
|
|
|
if tc.JobStatus != "" {
|
|
|
|
job.Status = tc.JobStatus
|
|
|
|
}
|
|
|
|
job.Stop = tc.JobStop
|
2018-01-30 15:12:14 +00:00
|
|
|
|
|
|
|
t.Run(tc.Desc, func(t *testing.T) {
|
2018-01-30 22:14:53 +00:00
|
|
|
if got := allocGCEligible(alloc, job, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
|
2018-01-30 15:12:14 +00:00
|
|
|
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
2018-01-30 22:14:53 +00:00
|
|
|
|
|
|
|
// Verify nil job
|
|
|
|
require := require.New(t)
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusComplete
|
|
|
|
require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
|
2018-01-30 15:12:14 +00:00
|
|
|
}
|
2020-02-19 14:05:33 +00:00
|
|
|
|
2020-04-02 20:04:56 +00:00
|
|
|
func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
|
2020-02-19 14:05:33 +00:00
|
|
|
t.Parallel()
|
2020-04-02 20:04:56 +00:00
|
|
|
srv, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
|
|
|
defer shutdownSrv()
|
2020-03-16 19:59:42 +00:00
|
|
|
testutil.WaitForLeader(t, srv.RPC)
|
|
|
|
|
|
|
|
state := srv.fsm.State()
|
|
|
|
ws := memdb.NewWatchSet()
|
2020-04-02 20:04:56 +00:00
|
|
|
index := uint64(100)
|
2020-03-16 19:59:42 +00:00
|
|
|
|
|
|
|
// Create a client node, plugin, and volume
|
|
|
|
node := mock.Node()
|
|
|
|
node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
|
|
|
|
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
|
|
|
"csi-plugin-example": {
|
|
|
|
PluginID: "csi-plugin-example",
|
|
|
|
Healthy: true,
|
|
|
|
RequiresControllerPlugin: true,
|
|
|
|
NodeInfo: &structs.CSINodeInfo{},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
node.CSIControllerPlugins = map[string]*structs.CSIInfo{
|
|
|
|
"csi-plugin-example": {
|
|
|
|
PluginID: "csi-plugin-example",
|
|
|
|
Healthy: true,
|
|
|
|
RequiresControllerPlugin: true,
|
|
|
|
ControllerInfo: &structs.CSIControllerInfo{
|
|
|
|
SupportsReadOnlyAttach: true,
|
|
|
|
SupportsAttachDetach: true,
|
|
|
|
SupportsListVolumes: true,
|
|
|
|
SupportsListVolumesAttachedNodes: false,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err := state.UpsertNode(99, node)
|
|
|
|
require.NoError(t, err)
|
|
|
|
volId0 := uuid.Generate()
|
2020-03-17 21:32:39 +00:00
|
|
|
ns := structs.DefaultNamespace
|
2020-03-16 19:59:42 +00:00
|
|
|
vols := []*structs.CSIVolume{{
|
|
|
|
ID: volId0,
|
2020-03-17 21:32:39 +00:00
|
|
|
Namespace: ns,
|
2020-03-16 19:59:42 +00:00
|
|
|
PluginID: "csi-plugin-example",
|
|
|
|
AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter,
|
|
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
|
|
}}
|
2020-04-02 20:04:56 +00:00
|
|
|
|
|
|
|
err = state.CSIVolumeRegister(index, vols)
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
2020-03-17 21:32:39 +00:00
|
|
|
vol, err := state.CSIVolumeByID(ws, ns, volId0)
|
2020-03-16 19:59:42 +00:00
|
|
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, vol.ControllerRequired)
|
|
|
|
require.Len(t, vol.ReadAllocs, 0)
|
|
|
|
require.Len(t, vol.WriteAllocs, 0)
|
|
|
|
|
|
|
|
// Create a job with 2 allocations
|
|
|
|
job := mock.Job()
|
|
|
|
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
|
|
|
|
"_": {
|
|
|
|
Name: "someVolume",
|
|
|
|
Type: structs.VolumeTypeCSI,
|
|
|
|
Source: volId0,
|
|
|
|
ReadOnly: false,
|
|
|
|
},
|
|
|
|
}
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.UpsertJob(index, job)
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
alloc1 := mock.Alloc()
|
|
|
|
alloc1.JobID = job.ID
|
|
|
|
alloc1.NodeID = node.ID
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.UpsertJobSummary(index, mock.JobSummary(alloc1.JobID))
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
alloc1.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.JobID = job.ID
|
|
|
|
alloc2.NodeID = node.ID
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.UpsertJobSummary(index, mock.JobSummary(alloc2.JobID))
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
alloc2.TaskGroup = job.TaskGroups[0].Name
|
|
|
|
|
|
|
|
err = state.UpsertAllocs(104, []*structs.Allocation{alloc1, alloc2})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Claim the volumes and verify the claims were set
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.CSIVolumeClaim(index, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.CSIVolumeClaim(index, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
|
|
|
index++
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
2020-03-17 21:32:39 +00:00
|
|
|
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, vol.ReadAllocs, 1)
|
|
|
|
require.Len(t, vol.WriteAllocs, 1)
|
|
|
|
|
|
|
|
// Update both allocs as failed/terminated
|
|
|
|
alloc1.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
alloc2.ClientStatus = structs.AllocClientStatusFailed
|
2020-04-02 20:04:56 +00:00
|
|
|
err = state.UpdateAllocsFromClient(index, []*structs.Allocation{alloc1, alloc2})
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2020-04-02 20:04:56 +00:00
|
|
|
vol, err = state.CSIVolumeDenormalize(ws, vol)
|
2020-03-16 19:59:42 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2020-04-03 21:37:26 +00:00
|
|
|
gcClaims, nodeClaims := collectClaimsToGCImpl(vol, false)
|
2020-04-02 20:04:56 +00:00
|
|
|
require.Equal(t, nodeClaims[node.ID], 2)
|
|
|
|
require.Len(t, gcClaims, 2)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
require := require.New(t)
|
|
|
|
|
|
|
|
s, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
|
|
|
defer shutdownSrv()
|
|
|
|
testutil.WaitForLeader(t, s.RPC)
|
|
|
|
|
|
|
|
node := mock.Node()
|
|
|
|
plugin := mock.CSIPlugin()
|
|
|
|
vol := mock.CSIVolume(plugin)
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
|
|
|
|
cases := []struct {
|
|
|
|
Name string
|
|
|
|
Claim gcClaimRequest
|
|
|
|
ClaimsCount map[string]int
|
|
|
|
ControllerRequired bool
|
|
|
|
ExpectedErr string
|
|
|
|
ExpectedCount int
|
|
|
|
ExpectedClaimsCount int
|
|
|
|
ExpectedNodeDetachVolumeCount int
|
|
|
|
ExpectedControllerDetachVolumeCount int
|
|
|
|
ExpectedVolumeClaimCount int
|
|
|
|
srv *MockRPCServer
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
Name: "NodeDetachVolume fails",
|
|
|
|
Claim: gcClaimRequest{
|
|
|
|
allocID: alloc.ID,
|
|
|
|
nodeID: node.ID,
|
|
|
|
mode: structs.CSIVolumeClaimRead,
|
|
|
|
},
|
|
|
|
ClaimsCount: map[string]int{node.ID: 1},
|
|
|
|
ControllerRequired: true,
|
|
|
|
ExpectedErr: "node plugin missing",
|
|
|
|
ExpectedClaimsCount: 1,
|
|
|
|
ExpectedNodeDetachVolumeCount: 1,
|
|
|
|
srv: &MockRPCServer{
|
|
|
|
state: s.State(),
|
|
|
|
nextCSINodeDetachVolumeError: fmt.Errorf("node plugin missing"),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: "ControllerDetachVolume no controllers",
|
|
|
|
Claim: gcClaimRequest{
|
|
|
|
allocID: alloc.ID,
|
|
|
|
nodeID: node.ID,
|
|
|
|
mode: structs.CSIVolumeClaimRead,
|
|
|
|
},
|
|
|
|
ClaimsCount: map[string]int{node.ID: 1},
|
|
|
|
ControllerRequired: true,
|
|
|
|
ExpectedErr: fmt.Sprintf(
|
2020-04-04 15:03:44 +00:00
|
|
|
"Unknown node: %s", node.ID),
|
2020-04-02 20:04:56 +00:00
|
|
|
ExpectedClaimsCount: 0,
|
|
|
|
ExpectedNodeDetachVolumeCount: 1,
|
|
|
|
ExpectedControllerDetachVolumeCount: 0,
|
|
|
|
srv: &MockRPCServer{
|
|
|
|
state: s.State(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: "ControllerDetachVolume node-only",
|
|
|
|
Claim: gcClaimRequest{
|
|
|
|
allocID: alloc.ID,
|
|
|
|
nodeID: node.ID,
|
|
|
|
mode: structs.CSIVolumeClaimRead,
|
|
|
|
},
|
|
|
|
ClaimsCount: map[string]int{node.ID: 1},
|
|
|
|
ControllerRequired: false,
|
|
|
|
ExpectedClaimsCount: 0,
|
|
|
|
ExpectedNodeDetachVolumeCount: 1,
|
|
|
|
ExpectedControllerDetachVolumeCount: 0,
|
|
|
|
ExpectedVolumeClaimCount: 1,
|
|
|
|
srv: &MockRPCServer{
|
|
|
|
state: s.State(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, tc := range cases {
|
|
|
|
t.Run(tc.Name, func(t *testing.T) {
|
|
|
|
vol.ControllerRequired = tc.ControllerRequired
|
|
|
|
nodeClaims, err := volumeClaimReapImpl(tc.srv, &volumeClaimReapArgs{
|
|
|
|
vol: vol,
|
|
|
|
plug: plugin,
|
|
|
|
allocID: tc.Claim.allocID,
|
|
|
|
nodeID: tc.Claim.nodeID,
|
|
|
|
mode: tc.Claim.mode,
|
|
|
|
region: "global",
|
|
|
|
namespace: "default",
|
|
|
|
leaderACL: "not-in-use",
|
|
|
|
nodeClaims: tc.ClaimsCount,
|
|
|
|
})
|
|
|
|
if tc.ExpectedErr != "" {
|
|
|
|
require.EqualError(err, tc.ExpectedErr)
|
|
|
|
} else {
|
|
|
|
require.NoError(err)
|
|
|
|
}
|
|
|
|
require.Equal(tc.ExpectedClaimsCount,
|
|
|
|
nodeClaims[tc.Claim.nodeID], "expected claims")
|
|
|
|
require.Equal(tc.ExpectedNodeDetachVolumeCount,
|
|
|
|
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
|
|
|
|
require.Equal(tc.ExpectedControllerDetachVolumeCount,
|
|
|
|
tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
|
|
|
require.Equal(tc.ExpectedVolumeClaimCount,
|
|
|
|
tc.srv.countCSIVolumeClaim, "volume claim RPC count")
|
|
|
|
})
|
|
|
|
}
|
2020-03-16 19:59:42 +00:00
|
|
|
}
|
2020-04-02 20:04:56 +00:00
|
|
|
|
|
|
|
type MockRPCServer struct {
|
|
|
|
state *state.StateStore
|
|
|
|
|
|
|
|
// mock responses for ClientCSI.NodeDetachVolume
|
|
|
|
nextCSINodeDetachVolumeResponse *cstructs.ClientCSINodeDetachVolumeResponse
|
|
|
|
nextCSINodeDetachVolumeError error
|
|
|
|
countCSINodeDetachVolume int
|
|
|
|
|
|
|
|
// mock responses for ClientCSI.ControllerDetachVolume
|
|
|
|
nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
|
|
|
|
nextCSIControllerDetachVolumeError error
|
|
|
|
countCSIControllerDetachVolume int
|
|
|
|
|
|
|
|
// mock responses for CSI.VolumeClaim
|
|
|
|
nextCSIVolumeClaimResponse *structs.CSIVolumeClaimResponse
|
|
|
|
nextCSIVolumeClaimError error
|
|
|
|
countCSIVolumeClaim int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (srv *MockRPCServer) RPC(method string, args interface{}, reply interface{}) error {
|
|
|
|
switch method {
|
|
|
|
case "ClientCSI.NodeDetachVolume":
|
|
|
|
reply = srv.nextCSINodeDetachVolumeResponse
|
|
|
|
srv.countCSINodeDetachVolume++
|
|
|
|
return srv.nextCSINodeDetachVolumeError
|
|
|
|
case "ClientCSI.ControllerDetachVolume":
|
|
|
|
reply = srv.nextCSIControllerDetachVolumeResponse
|
|
|
|
srv.countCSIControllerDetachVolume++
|
|
|
|
return srv.nextCSIControllerDetachVolumeError
|
|
|
|
case "CSIVolume.Claim":
|
|
|
|
reply = srv.nextCSIVolumeClaimResponse
|
|
|
|
srv.countCSIVolumeClaim++
|
|
|
|
return srv.nextCSIVolumeClaimError
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("unexpected method %q passed to mock", method)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
|