Use next alloc id being set, move outside structs package and other code review feedback

This commit is contained in:
Preetha Appan 2018-01-30 09:12:14 -06:00
parent 009df8b986
commit 4fd2691323
No known key found for this signature in database
GPG Key ID: 9F7C19990A50EAFC
3 changed files with 242 additions and 229 deletions

View File

@ -288,13 +288,20 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
gcEval := true
var gcAllocIDs []string
for _, alloc := range allocs {
if job == nil || job.Stop {
// Eligible to be GC'd because the job is not around or stopped
// We don't consider jobs with "dead" status here because it may still
// have terminal allocs that are reschedulable
gcAllocIDs = append(gcAllocIDs, alloc.ID)
continue
}
var reschedulePolicy *structs.ReschedulePolicy
tg := job.LookupTaskGroup(alloc.TaskGroup)
if tg != nil {
reschedulePolicy = tg.ReschedulePolicy
}
if !alloc.GCEligible(reschedulePolicy, time.Now(), thresholdIndex) {
if !gcEligible(alloc, reschedulePolicy, time.Now(), thresholdIndex) {
// Can't GC the evaluation since not all of the allocations are
// terminal
gcEval = false
@ -567,3 +574,32 @@ func (c *CoreScheduler) partitionDeploymentReap(deployments []string) []*structs
return requests
}
// gcEligible returns if the allocation is eligible to be garbage collected
// according to its terminal status and its reschedule trackers
func gcEligible(a *structs.Allocation, reschedulePolicy *structs.ReschedulePolicy, gcTime time.Time, thresholdIndex uint64) bool {
// Not in a terminal status and old enough
if !a.TerminalStatus() || a.ModifyIndex > thresholdIndex {
return false
}
// No reschedule policy or restarts are disabled
if reschedulePolicy == nil || reschedulePolicy.Attempts == 0 || reschedulePolicy.Interval == 0 {
return true
}
// Restart tracking information has been carried forward
if a.NextAllocation != "" {
return true
}
// Eligible for restarts but none have been attempted yet
if a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0 {
return false
}
// Most recent reschedule attempt is within time interval
interval := reschedulePolicy.Interval
lastIndex := len(a.RescheduleTracker.Events)
lastRescheduleEvent := a.RescheduleTracker.Events[lastIndex-1]
timeDiff := gcTime.UTC().UnixNano() - lastRescheduleEvent.RescheduleTime
return timeDiff > interval.Nanoseconds()
}

View File

@ -200,6 +200,81 @@ func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
}
// Tests GC behavior on stopped job with reschedulable allocs
func TestCoreScheduler_EvalGC_StoppedJob_Reschedulable(t *testing.T) {
t.Parallel()
s1 := testServer(t, nil)
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)
require := require.New(t)
// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
// Insert "dead" eval
state := s1.fsm.State()
eval := mock.Eval()
eval.Status = structs.EvalStatusFailed
state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
require.Nil(err)
// Insert mock stopped job with default reschedule policy of 2 in 10 minutes
job := mock.Job()
job.ID = eval.JobID
job.Stop = true
err = state.UpsertJob(1001, job)
require.Nil(err)
// Insert failed alloc with a recent reschedule attempt
alloc := mock.Alloc()
alloc.EvalID = eval.ID
alloc.DesiredStatus = structs.AllocDesiredStatusRun
alloc.ClientStatus = structs.AllocClientStatusLost
alloc.JobID = eval.JobID
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.RescheduleTracker = &structs.RescheduleTracker{
Events: []*structs.RescheduleEvent{
{
RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
PrevNodeID: uuid.Generate(),
PrevAllocID: uuid.Generate(),
},
},
}
err = state.UpsertAllocs(1001, []*structs.Allocation{alloc})
require.Nil(err)
// Update the time tables to make this work
tt := s1.fsm.TimeTable()
tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
// Create a core scheduler
snap, err := state.Snapshot()
if err != nil {
t.Fatalf("err: %v", err)
}
core := NewCoreScheduler(s1, snap)
// Attempt the GC
gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
err = core.Process(gc)
require.Nil(err)
// Eval should not exist
ws := memdb.NewWatchSet()
out, err := state.EvalByID(ws, eval.ID)
require.Nil(err)
require.Nil(out)
// Alloc should not exist
outA, err := state.AllocByID(ws, alloc.ID)
require.Nil(err)
require.Nil(outA)
}
// An EvalGC should never reap a batch job that has not been stopped
func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
t.Parallel()
@ -1680,3 +1755,133 @@ func TestCoreScheduler_PartitionDeploymentReap(t *testing.T) {
t.Fatalf("Unexpected second request: %v", second)
}
}
// Tests various scenarios when allocations are eligible to be GCed
func TestAllocation_GCEligible(t *testing.T) {
type testCase struct {
Desc string
GCTime time.Time
ClientStatus string
DesiredStatus string
ModifyIndex uint64
NextAllocID string
ReschedulePolicy *structs.ReschedulePolicy
RescheduleTrackers []*structs.RescheduleEvent
ThresholdIndex uint64
ShouldGC bool
}
fail := time.Now()
harness := []testCase{
{
Desc: "GC when non terminal",
ClientStatus: structs.AllocClientStatusPending,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when threshold not met",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusStop,
GCTime: fail,
ModifyIndex: 100,
ThresholdIndex: 90,
ReschedulePolicy: nil,
ShouldGC: false,
},
{
Desc: "GC when no reschedule policy",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: nil,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC when empty policy",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{0, 0 * time.Minute},
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC with no previous attempts",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ReschedulePolicy: &structs.ReschedulePolicy{1, 1 * time.Minute},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt within interval",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
ReschedulePolicy: &structs.ReschedulePolicy{2, 30 * time.Minute},
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt outside interval",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: true,
},
{
Desc: "GC when next alloc id is set",
ClientStatus: structs.AllocClientStatusFailed,
DesiredStatus: structs.AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &structs.ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*structs.RescheduleEvent{
{
RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
},
},
NextAllocID: uuid.Generate(),
ShouldGC: true,
},
}
for _, tc := range harness {
alloc := &structs.Allocation{}
alloc.ModifyIndex = tc.ModifyIndex
alloc.DesiredStatus = tc.DesiredStatus
alloc.ClientStatus = tc.ClientStatus
alloc.RescheduleTracker = &structs.RescheduleTracker{tc.RescheduleTrackers}
t.Run(tc.Desc, func(t *testing.T) {
if got := gcEligible(alloc, tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
}
})
}
}

View File

@ -2842,234 +2842,6 @@ func TestRescheduleTracker_Copy(t *testing.T) {
}
}
func TestAllocation_GCEligible(t *testing.T) {
type testCase struct {
Desc string
GCTime time.Time
ClientStatus string
DesiredStatus string
ModifyIndex uint64
ReschedulePolicy *ReschedulePolicy
RescheduleTrackers []*RescheduleEvent
ThresholdIndex uint64
ShouldGC bool
}
fail := time.Now()
harness := []testCase{
{
Desc: "GC when non terminal",
ClientStatus: AllocClientStatusPending,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when threshold not met",
ClientStatus: AllocClientStatusComplete,
DesiredStatus: AllocDesiredStatusStop,
GCTime: fail,
ModifyIndex: 100,
ThresholdIndex: 90,
ReschedulePolicy: nil,
ShouldGC: false,
},
{
Desc: "GC when no reschedule policy",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: nil,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC when empty policy",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &ReschedulePolicy{0, 0 * time.Minute},
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC with no previous attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt within interval",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
ReschedulePolicy: &ReschedulePolicy{2, 30 * time.Minute},
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt outside interval",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: true,
},
}
for _, tc := range harness {
alloc := Allocation{}
alloc.ModifyIndex = tc.ModifyIndex
alloc.DesiredStatus = tc.DesiredStatus
alloc.ClientStatus = tc.ClientStatus
alloc.RescheduleTracker = &RescheduleTracker{tc.RescheduleTrackers}
t.Run(tc.Desc, func(t *testing.T) {
if got := alloc.GCEligible(tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
}
})
}
}
func TestAllocation_GCEligible(t *testing.T) {
type testCase struct {
Desc string
GCTime time.Time
ClientStatus string
DesiredStatus string
ModifyIndex uint64
ReschedulePolicy *ReschedulePolicy
RescheduleTrackers []*RescheduleEvent
ThresholdIndex uint64
ShouldGC bool
}
fail := time.Now()
harness := []testCase{
{
Desc: "GC when non terminal",
ClientStatus: AllocClientStatusPending,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: false,
},
{
Desc: "GC when threshold not met",
ClientStatus: AllocClientStatusComplete,
DesiredStatus: AllocDesiredStatusStop,
GCTime: fail,
ModifyIndex: 100,
ThresholdIndex: 90,
ReschedulePolicy: nil,
ShouldGC: false,
},
{
Desc: "GC when no reschedule policy",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: nil,
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC when empty policy",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &ReschedulePolicy{0, 0 * time.Minute},
ModifyIndex: 90,
ThresholdIndex: 90,
ShouldGC: true,
},
{
Desc: "GC with no previous attempts",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
ReschedulePolicy: &ReschedulePolicy{1, 1 * time.Minute},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt within interval",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
ReschedulePolicy: &ReschedulePolicy{2, 30 * time.Minute},
GCTime: fail,
ModifyIndex: 90,
ThresholdIndex: 90,
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: false,
},
{
Desc: "GC with prev reschedule attempt outside interval",
ClientStatus: AllocClientStatusFailed,
DesiredStatus: AllocDesiredStatusRun,
GCTime: fail,
ReschedulePolicy: &ReschedulePolicy{5, 30 * time.Minute},
RescheduleTrackers: []*RescheduleEvent{
{
RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
},
{
RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
},
},
ShouldGC: true,
},
}
for _, tc := range harness {
alloc := Allocation{}
alloc.ModifyIndex = tc.ModifyIndex
alloc.DesiredStatus = tc.DesiredStatus
alloc.ClientStatus = tc.ClientStatus
alloc.RescheduleTracker = &RescheduleTracker{tc.RescheduleTrackers}
t.Run(tc.Desc, func(t *testing.T) {
if got := alloc.GCEligible(tc.ReschedulePolicy, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
}
})
}
}
func TestVault_Validate(t *testing.T) {
v := &Vault{
Env: true,