Filter out allocs with DesiredState = stop, and unit tests

This commit is contained in:
Preetha Appan 2018-03-29 09:28:52 -05:00
parent 465ac8f90d
commit 5090fefe96
No known key found for this signature in database
GPG Key ID: 9F7C19990A50EAFC
3 changed files with 166 additions and 37 deletions

View File

@ -5758,7 +5758,7 @@ func (a *Allocation) ReschedulePolicy() *ReschedulePolicy {
func (a *Allocation) NextRescheduleTime() (time.Time, bool) {
failTime := a.LastEventTime()
reschedulePolicy := a.ReschedulePolicy()
if a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil {
if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil {
return time.Time{}, false
}

View File

@ -15,6 +15,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -317,6 +318,8 @@ type resultExpectation struct {
func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
t.Helper()
assert := assert.New(t)
if exp.createDeployment != nil && r.deployment == nil {
t.Fatalf("Expect a created deployment got none")
} else if exp.createDeployment == nil && r.deployment != nil {
@ -330,39 +333,13 @@ func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
}
}
if !reflect.DeepEqual(r.deploymentUpdates, exp.deploymentUpdates) {
t.Fatalf("Unexpected deploymentUpdates: %v", pretty.Diff(r.deploymentUpdates, exp.deploymentUpdates))
}
if l := len(r.place); l != exp.place {
t.Fatalf("Expected %d placements; got %d", exp.place, l)
}
if l := len(r.destructiveUpdate); l != exp.destructive {
t.Fatalf("Expected %d destructive; got %d", exp.destructive, l)
}
if l := len(r.inplaceUpdate); l != exp.inplace {
t.Fatalf("Expected %d inplaceUpdate; got %d", exp.inplace, l)
}
if l := len(r.attributeUpdates); l != exp.attributeUpdates {
t.Fatalf("Expected %d attribute updates; got %d", exp.attributeUpdates, l)
}
if l := len(r.stop); l != exp.stop {
t.Fatalf("Expected %d stops; got %d", exp.stop, l)
}
if l := len(r.desiredTGUpdates); l != len(exp.desiredTGUpdates) {
t.Fatalf("Expected %d task group desired tg updates annotations; got %d", len(exp.desiredTGUpdates), l)
}
// Check the desired updates happened
for group, desired := range exp.desiredTGUpdates {
act, ok := r.desiredTGUpdates[group]
if !ok {
t.Fatalf("Expected desired updates for group %q", group)
}
if !reflect.DeepEqual(act, desired) {
t.Fatalf("Unexpected annotations for group %q: %v", group, pretty.Diff(act, desired))
}
}
assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
assert.Len(r.place, exp.place, "Expected Placements")
assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
assert.Len(r.stop, exp.stop, "Expected Stops")
assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
}
// Tests the reconciler properly handles placements for a job that has no
@ -1606,6 +1583,65 @@ func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
}
// Tests service job placement with desired stop and client status complete
func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
delayDur := 15 * time.Second
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: delayDur,
MaxDelay: 1 * time.Hour,
}
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DesiredStatus = structs.AllocDesiredStatusRun
}
// Mark one as failed but with desired status stop
// Should not trigger rescheduling logic but should trigger a placement
allocs[4].ClientStatus = structs.AllocClientStatusFailed
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil)
r := reconciler.Compute()
// Should place a new placement for the alloc that was marked stopped
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
},
},
})
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
// Should not have any follow up evals created
require := require.New(t)
require.Equal(0, len(r.desiredFollowupEvals))
}
// Tests rescheduling failed service allocations with desired state stop
func TestReconciler_RescheduleNow_Service(t *testing.T) {
require := require.New(t)
@ -3772,3 +3808,98 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
},
})
}
// Test that a failed deployment cancels non-promoted canaries
func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
// Create a job
job := mock.Job()
job.TaskGroups[0].Count = 3
job.TaskGroups[0].Update = &structs.UpdateStrategy{
Canary: 3,
MaxParallel: 2,
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
MinHealthyTime: 10 * time.Second,
HealthyDeadline: 10 * time.Minute,
Stagger: 31 * time.Second,
}
// Create v1 of the job
jobv1 := job.Copy()
jobv1.Version = 1
jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
// Create v2 of the job
jobv2 := job.Copy()
jobv2.Version = 2
jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
// Create an existing failed deployment that has promoted one task group
d := structs.NewDeployment(jobv2)
state := &structs.DeploymentState{
Promoted: false,
DesiredTotal: 3,
PlacedAllocs: 3,
}
d.TaskGroups[job.TaskGroups[0].Name] = state
// Create the original
var allocs []*structs.Allocation
for i := 0; i < 3; i++ {
new := mock.Alloc()
new.Job = jobv2
new.JobID = job.ID
new.NodeID = uuid.Generate()
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(true),
}
new.ClientStatus = structs.AllocClientStatusRunning
allocs = append(allocs, new)
}
for i := 0; i < 3; i++ {
new := mock.Alloc()
new.Job = jobv1
new.JobID = jobv1.ID
new.NodeID = uuid.Generate()
new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = uuid.Generate()
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(false),
}
new.DesiredStatus = structs.AllocDesiredStatusStop
new.ClientStatus = structs.AllocClientStatusFailed
allocs = append(allocs, new)
t.Logf("Canary %q", new.ID)
}
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil)
r := reconciler.Compute()
updates := []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusSuccessful,
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
},
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 0,
InPlaceUpdate: 0,
Ignore: 6,
},
},
})
}

View File

@ -283,11 +283,9 @@ func updateByReschedulable(alloc *structs.Allocation, now time.Time, batch bool)
if !batch {
// For service type jobs we filter terminal allocs
// except for those with ClientStatusFailed - those are checked for reschedulability
shouldFilter = alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed
shouldFilter = alloc.DesiredStatus == structs.AllocDesiredStatusStop || (alloc.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusFailed)
}
rescheduleTime, eligible := alloc.NextRescheduleTime()
// We consider a time difference of less than 5 seconds to be eligible
// because we collapse allocations that failed within 5 seconds into a single evaluation
if eligible && now.After(rescheduleTime) {
rescheduleNow = true
} else if !shouldFilter {