Use next alloc id being set, move outside structs package and other code review feedback
This commit is contained in:
parent
009df8b986
commit
4fd2691323
|
@ -288,13 +288,20 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
|
|||
gcEval := true
|
||||
var gcAllocIDs []string
|
||||
for _, alloc := range allocs {
|
||||
if job == nil || job.Stop {
|
||||
// Eligible to be GC'd because the job is not around or stopped
|
||||
// We don't consider jobs with "dead" status here because it may still
|
||||
// have terminal allocs that are reschedulable
|
||||
gcAllocIDs = append(gcAllocIDs, alloc.ID)
|
||||
continue
|
||||
}
|
||||
var reschedulePolicy *structs.ReschedulePolicy
|
||||
tg := job.LookupTaskGroup(alloc.TaskGroup)
|
||||
|
||||
if tg != nil {
|
||||
reschedulePolicy = tg.ReschedulePolicy
|
||||
}
|
||||
if !alloc.GCEligible(reschedulePolicy, time.Now(), thresholdIndex) {
|
||||
if !gcEligible(alloc, reschedulePolicy, time.Now(), thresholdIndex) {
|
||||
// Can't GC the evaluation since not all of the allocations are
|
||||
// terminal
|
||||
gcEval = false
|
||||
|
@ -567,3 +574,32 @@ func (c *CoreScheduler) partitionDeploymentReap(deployments []string) []*structs
|
|||
|
||||
return requests
|
||||
}
|
||||
|
||||
// gcEligible returns if the allocation is eligible to be garbage collected
|
||||
// according to its terminal status and its reschedule trackers
|
||||
func gcEligible(a *structs.Allocation, reschedulePolicy *structs.ReschedulePolicy, gcTime time.Time, thresholdIndex uint64) bool {
|
||||
// Not in a terminal status and old enough
|
||||
if !a.TerminalStatus() || a.ModifyIndex > thresholdIndex {
|
||||
return false
|
||||
}
|
||||
// No reschedule policy or restarts are disabled
|
||||
if reschedulePolicy == nil || reschedulePolicy.Attempts == 0 || reschedulePolicy.Interval == 0 {
|
||||
return true
|
||||
}
|
||||
// Restart tracking information has been carried forward
|
||||
if a.NextAllocation != "" {
|
||||
return true
|
||||
}
|
||||
// Eligible for restarts but none have been attempted yet
|
||||
if a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Most recent reschedule attempt is within time interval
|
||||
interval := reschedulePolicy.Interval
|
||||
lastIndex := len(a.RescheduleTracker.Events)
|
||||
lastRescheduleEvent := a.RescheduleTracker.Events[lastIndex-1]
|
||||
timeDiff := gcTime.UTC().UnixNano() - lastRescheduleEvent.RescheduleTime
|
||||
|
||||
return timeDiff > interval.Nanoseconds()
|
||||
}
|
||||
|
|
|
@ -200,6 +200,81 @@ func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
// Tests GC behavior on stopped job with reschedulable allocs
|
||||
func TestCoreScheduler_EvalGC_StoppedJob_Reschedulable(t *testing.T) {
|
||||
t.Parallel()
|
||||
s1 := testServer(t, nil)
|
||||
defer s1.Shutdown()
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
require := require.New(t)
|
||||
|
||||
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
|
||||
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
|
||||
|
||||
// Insert "dead" eval
|
||||
state := s1.fsm.State()
|
||||
eval := mock.Eval()
|
||||
eval.Status = structs.EvalStatusFailed
|
||||
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
|
||||
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
|
||||
require.Nil(err)
|
||||
|
||||
// Insert mock stopped job with default reschedule policy of 2 in 10 minutes
|
||||
job := mock.Job()
|
||||
job.ID = eval.JobID
|
||||
job.Stop = true
|
||||
|
||||
err = state.UpsertJob(1001, job)
|
||||
require.Nil(err)
|
||||
|
||||
// Insert failed alloc with a recent reschedule attempt
|
||||
alloc := mock.Alloc()
|
||||
alloc.EvalID = eval.ID
|
||||
alloc.DesiredStatus = structs.AllocDesiredStatusRun
|
||||
alloc.ClientStatus = structs.AllocClientStatusLost
|
||||
alloc.JobID = eval.JobID
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
alloc.RescheduleTracker = &structs.RescheduleTracker{
|
||||
Events: []*structs.RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
|
||||
PrevNodeID: uuid.Generate(),
|
||||
PrevAllocID: uuid.Generate(),
|
||||
},
|
||||
},
|
||||
}
|
||||
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc})
|
||||
require.Nil(err)
|
||||
|
||||
// Update the time tables to make this work
|
||||
tt := s1.fsm.TimeTable()
|
||||
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
|
||||
|
||||
// Create a core scheduler
|
||||
snap, err := state.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
core := NewCoreScheduler(s1, snap)
|
||||
|
||||
// Attempt the GC
|
||||
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
|
||||
err = core.Process(gc)
|
||||
require.Nil(err)
|
||||
|
||||
// Eval should not exist
|
||||
ws := memdb.NewWatchSet()
|
||||
out, err := state.EvalByID(ws, eval.ID)
|
||||
require.Nil(err)
|
||||
require.Nil(out)
|
||||
|
||||
// Alloc should not exist
|
||||
outA, err := state.AllocByID(ws, alloc.ID)
|
||||
require.Nil(err)
|
||||
require.Nil(outA)
|
||||
|
||||
}
|
||||
|
||||
// An EvalGC should never reap a batch job that has not been stopped
|
||||
func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
@ -1680,3 +1755,133 @@ func TestCoreScheduler_PartitionDeploymentReap(t *testing.T) {
|
|||
t.Fatalf("Unexpected second request: %v", second)
|
||||
}
|
||||
}
|
||||
|
||||
// Tests various scenarios when allocations are eligible to be GCed
|
||||
func TestAllocation_GCEligible(t *testing.T) {
|
||||
type testCase struct {
|
||||
Desc string
|
||||
GCTime time.Time
|
||||
ClientStatus string
|
||||
DesiredStatus string
|
||||
ModifyIndex uint64
|
||||
NextAllocID string
|
||||
ReschedulePolicy *structs.ReschedulePolicy
|
||||
RescheduleTrackers []*structs.RescheduleEvent
|
||||
ThresholdIndex uint64
|
||||
ShouldGC bool
|
||||
}
|
||||
|
||||
fail := time.Now()
|
||||
|
||||
harness := []testCase{
|
||||
{
|
||||
Desc: "GC when non terminal",
|
||||
ClientStatus: structs.AllocClientStatusPending,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when threshold not met",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 100,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: nil,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when no reschedule policy",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: nil,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC when empty policy",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &structs.ReschedulePolicy{0, 0 * time.Minute},
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC with no previous attempts",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: &structs.ReschedulePolicy{1, 1 * time.Minute},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt within interval",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
ReschedulePolicy: &structs.ReschedulePolicy{2, 30 * time.Minute},
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
RescheduleTrackers: []*structs.RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt outside interval",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
|
||||
RescheduleTrackers: []*structs.RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
{
|
||||
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC when next alloc id is set",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
|
||||
RescheduleTrackers: []*structs.RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
NextAllocID: uuid.Generate(),
|
||||
ShouldGC: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range harness {
|
||||
alloc := &structs.Allocation{}
|
||||
alloc.ModifyIndex = tc.ModifyIndex
|
||||
alloc.DesiredStatus = tc.DesiredStatus
|
||||
alloc.ClientStatus = tc.ClientStatus
|
||||
alloc.RescheduleTracker = &structs.RescheduleTracker{tc.RescheduleTrackers}
|
||||
|
||||
t.Run(tc.Desc, func(t *testing.T) {
|
||||
if got := gcEligible(alloc, tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
|
||||
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2842,234 +2842,6 @@ func TestRescheduleTracker_Copy(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAllocation_GCEligible(t *testing.T) {
|
||||
type testCase struct {
|
||||
Desc string
|
||||
GCTime time.Time
|
||||
ClientStatus string
|
||||
DesiredStatus string
|
||||
ModifyIndex uint64
|
||||
ReschedulePolicy *ReschedulePolicy
|
||||
RescheduleTrackers []*RescheduleEvent
|
||||
ThresholdIndex uint64
|
||||
ShouldGC bool
|
||||
}
|
||||
|
||||
fail := time.Now()
|
||||
|
||||
harness := []testCase{
|
||||
{
|
||||
Desc: "GC when non terminal",
|
||||
ClientStatus: AllocClientStatusPending,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when threshold not met",
|
||||
ClientStatus: AllocClientStatusComplete,
|
||||
DesiredStatus: AllocDesiredStatusStop,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 100,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: nil,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when no reschedule policy",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: nil,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC when empty policy",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &ReschedulePolicy{0, 0 * time.Minute},
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC with no previous attempts",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt within interval",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
ReschedulePolicy: &ReschedulePolicy{2, 30 * time.Minute},
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
RescheduleTrackers: []*RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt outside interval",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &ReschedulePolicy{5, 30 * time.Minute},
|
||||
RescheduleTrackers: []*RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
{
|
||||
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range harness {
|
||||
alloc := Allocation{}
|
||||
alloc.ModifyIndex = tc.ModifyIndex
|
||||
alloc.DesiredStatus = tc.DesiredStatus
|
||||
alloc.ClientStatus = tc.ClientStatus
|
||||
alloc.RescheduleTracker = &RescheduleTracker{tc.RescheduleTrackers}
|
||||
|
||||
t.Run(tc.Desc, func(t *testing.T) {
|
||||
if got := alloc.GCEligible(tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
|
||||
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllocation_GCEligible(t *testing.T) {
|
||||
type testCase struct {
|
||||
Desc string
|
||||
GCTime time.Time
|
||||
ClientStatus string
|
||||
DesiredStatus string
|
||||
ModifyIndex uint64
|
||||
ReschedulePolicy *ReschedulePolicy
|
||||
RescheduleTrackers []*RescheduleEvent
|
||||
ThresholdIndex uint64
|
||||
ShouldGC bool
|
||||
}
|
||||
|
||||
fail := time.Now()
|
||||
|
||||
harness := []testCase{
|
||||
{
|
||||
Desc: "GC when non terminal",
|
||||
ClientStatus: AllocClientStatusPending,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when threshold not met",
|
||||
ClientStatus: AllocClientStatusComplete,
|
||||
DesiredStatus: AllocDesiredStatusStop,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 100,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: nil,
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC when no reschedule policy",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: nil,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC when empty policy",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &ReschedulePolicy{0, 0 * time.Minute},
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ShouldGC: true,
|
||||
},
|
||||
{
|
||||
Desc: "GC with no previous attempts",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt within interval",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
ReschedulePolicy: &ReschedulePolicy{2, 30 * time.Minute},
|
||||
GCTime: fail,
|
||||
ModifyIndex: 90,
|
||||
ThresholdIndex: 90,
|
||||
RescheduleTrackers: []*RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: false,
|
||||
},
|
||||
{
|
||||
Desc: "GC with prev reschedule attempt outside interval",
|
||||
ClientStatus: AllocClientStatusFailed,
|
||||
DesiredStatus: AllocDesiredStatusRun,
|
||||
GCTime: fail,
|
||||
ReschedulePolicy: &ReschedulePolicy{5, 30 * time.Minute},
|
||||
RescheduleTrackers: []*RescheduleEvent{
|
||||
{
|
||||
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
{
|
||||
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
|
||||
},
|
||||
},
|
||||
ShouldGC: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range harness {
|
||||
alloc := Allocation{}
|
||||
alloc.ModifyIndex = tc.ModifyIndex
|
||||
alloc.DesiredStatus = tc.DesiredStatus
|
||||
alloc.ClientStatus = tc.ClientStatus
|
||||
alloc.RescheduleTracker = &RescheduleTracker{tc.RescheduleTrackers}
|
||||
|
||||
t.Run(tc.Desc, func(t *testing.T) {
|
||||
if got := alloc.GCEligible(tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
|
||||
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestVault_Validate(t *testing.T) {
|
||||
v := &Vault{
|
||||
Env: true,
|
||||
|
|
Loading…
Reference in New Issue