Consider dead job status and modify unit test setup for correctness
This commit is contained in:
parent
4fd2691323
commit
28d2439810
|
@ -258,16 +258,7 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
|
|||
// Job doesn't exist
|
||||
// Job is Stopped and dead
|
||||
// allowBatch and the job is dead
|
||||
collect := false
|
||||
if job == nil {
|
||||
collect = true
|
||||
} else if job.Status != structs.JobStatusDead {
|
||||
collect = false
|
||||
} else if job.Stop {
|
||||
collect = true
|
||||
} else if allowBatch {
|
||||
collect = true
|
||||
}
|
||||
collect := shouldCollect(job, allowBatch)
|
||||
|
||||
// We don't want to gc anything related to a job which is not dead
|
||||
// If the batch job doesn't exist we can GC it regardless of allowBatch
|
||||
|
@ -288,10 +279,8 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
|
|||
gcEval := true
|
||||
var gcAllocIDs []string
|
||||
for _, alloc := range allocs {
|
||||
if job == nil || job.Stop {
|
||||
// Eligible to be GC'd because the job is not around or stopped
|
||||
// We don't consider jobs with "dead" status here because it may still
|
||||
// have terminal allocs that are reschedulable
|
||||
if job == nil || job.Stop || job.Status == structs.JobStatusDead {
|
||||
// Eligible to be GC'd because the job is not around, stopped or dead
|
||||
gcAllocIDs = append(gcAllocIDs, alloc.ID)
|
||||
continue
|
||||
}
|
||||
|
@ -314,6 +303,21 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
|
|||
return gcEval, gcAllocIDs, nil
|
||||
}
|
||||
|
||||
// shouldCollect is a helper function that determines whether the job is eligible for GC
|
||||
func shouldCollect(job *structs.Job, allowBatch bool) bool {
|
||||
collect := false
|
||||
if job == nil {
|
||||
collect = true
|
||||
} else if job.Status != structs.JobStatusDead {
|
||||
collect = false
|
||||
} else if job.Stop {
|
||||
collect = true
|
||||
} else if allowBatch {
|
||||
collect = true
|
||||
}
|
||||
return collect
|
||||
}
|
||||
|
||||
// evalReap contacts the leader and issues a reap on the passed evals and
|
||||
// allocs.
|
||||
func (c *CoreScheduler) evalReap(evals, allocs []string) error {
|
||||
|
|
|
@ -125,6 +125,13 @@ func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
|
|||
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
||||
require.Nil(err)
|
||||
|
||||
// Insert "pending" eval for same job
|
||||
eval2 := mock.Eval()
|
||||
eval2.JobID = eval.JobID
|
||||
state.UpsertJobSummary(999, mock.JobSummary(eval2.JobID))
|
||||
err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
|
||||
require.Nil(err)
|
||||
|
||||
// Insert mock job with default reschedule policy of 2 in 10 minutes
|
||||
job := mock.Job()
|
||||
job.ID = eval.JobID
|
||||
|
@ -179,7 +186,7 @@ func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
|
|||
}
|
||||
core := NewCoreScheduler(s1, snap)
|
||||
|
||||
// Attempt the GC
|
||||
// Attempt the GC, job has all terminal allocs and one pending eval
|
||||
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
||||
err = core.Process(gc)
|
||||
require.Nil(err)
|
||||
|
@ -492,18 +499,13 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Insert mock job with rescheduling disabled
|
||||
// Create mock job with id same as eval
|
||||
job := mock.Job()
|
||||
job.ID = eval.JobID
|
||||
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
||||
Attempts: 0,
|
||||
Interval: 0 * time.Second,
|
||||
}
|
||||
err = state.UpsertJob(1001, job)
|
||||
require.Nil(err)
|
||||
|
||||
// Insert "dead" alloc
|
||||
alloc := mock.Alloc()
|
||||
alloc.JobID = job.ID
|
||||
alloc.EvalID = eval.ID
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusStop
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
|
@ -511,7 +513,7 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
|
|||
|
||||
// Insert "lost" alloc
|
||||
alloc2 := mock.Alloc()
|
||||
alloc2.JobID = alloc.JobID
|
||||
alloc2.JobID = job.ID
|
||||
alloc2.EvalID = eval.ID
|
||||
alloc2.TaskGroup = job.TaskGroups[0].Name
|
||||
alloc2.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
|
@ -525,12 +527,21 @@ func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
|
|||
// Insert "running" alloc
|
||||
alloc3 := mock.Alloc()
|
||||
alloc3.EvalID = eval.ID
|
||||
alloc3.JobID = job.ID
|
||||
state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID))
|
||||
err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Insert mock job with rescheduling disabled
|
||||
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
|
||||
Attempts: 0,
|
||||
Interval: 0 * time.Second,
|
||||
}
|
||||
err = state.UpsertJob(1001, job)
|
||||
require.Nil(err)
|
||||
|
||||
// Update the time tables to make this work
|
||||
tt := s1.fsm.TimeTable()
|
||||
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
||||
|
|
Loading…
Reference in New Issue