open-nomad/scheduler/reconcile_test.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

6279 lines
180 KiB
Go
Raw Permalink Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
2017-06-01 22:16:24 +00:00
package scheduler
import (
2017-06-02 23:11:29 +00:00
"fmt"
2017-06-01 22:16:24 +00:00
"reflect"
"regexp"
"strconv"
"testing"
2017-06-02 23:11:29 +00:00
"time"
2017-06-01 22:16:24 +00:00
"github.com/hashicorp/go-set"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer"
2018-06-13 22:33:25 +00:00
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/uuid"
2017-06-01 22:16:24 +00:00
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
2017-06-01 22:16:24 +00:00
)
2017-06-02 23:11:29 +00:00
var (
canaryUpdate = &structs.UpdateStrategy{
Canary: 2,
MaxParallel: 2,
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
MinHealthyTime: 10 * time.Second,
HealthyDeadline: 10 * time.Minute,
Stagger: 31 * time.Second,
2017-06-02 23:11:29 +00:00
}
noCanaryUpdate = &structs.UpdateStrategy{
MaxParallel: 4,
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
MinHealthyTime: 10 * time.Second,
HealthyDeadline: 10 * time.Minute,
Stagger: 31 * time.Second,
2017-06-02 23:11:29 +00:00
}
)
2017-06-01 22:16:24 +00:00
func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
return true, false, nil
}
func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
return false, true, nil
}
func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
// Create a shallow copy
2018-03-23 23:55:21 +00:00
newAlloc := existing.CopySkipJob()
2018-10-03 16:47:18 +00:00
newAlloc.AllocatedResources = &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{},
Shared: structs.AllocatedSharedResources{
2018-10-16 22:34:32 +00:00
DiskMB: int64(newTG.EphemeralDisk.SizeMB),
2018-10-03 16:47:18 +00:00
},
}
2017-06-01 22:16:24 +00:00
// Use the new task resources but keep the network from the old
for _, task := range newTG.Tasks {
2018-10-03 16:47:18 +00:00
networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks
newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{
2018-10-16 22:34:32 +00:00
CpuShares: int64(task.Resources.CPU),
2018-10-03 16:47:18 +00:00
},
Memory: structs.AllocatedMemoryResources{
2018-10-16 22:34:32 +00:00
MemoryMB: int64(task.Resources.MemoryMB),
2018-10-03 16:47:18 +00:00
},
Networks: networks,
}
2017-06-01 22:16:24 +00:00
}
return false, false, newAlloc
}
2017-06-02 23:11:29 +00:00
func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
if fn, ok := handled[existing.ID]; ok {
return fn(existing, newJob, newTG)
}
return unhandled(existing, newJob, newTG)
}
}
2017-06-01 22:16:24 +00:00
var (
// AllocationIndexRegex is a regular expression to find the allocation index.
allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
)
// allocNameToIndex returns the index of the allocation.
func allocNameToIndex(name string) uint {
matches := allocationIndexRegex.FindStringSubmatch(name)
if len(matches) != 2 {
return 0
}
index, err := strconv.Atoi(matches[1])
if err != nil {
return 0
}
return uint(index)
}
func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
t.Helper()
2017-06-01 22:16:24 +00:00
m := make(map[uint]int)
for _, i := range indexes {
m[uint(i)] += 1
}
for _, n := range names {
index := allocNameToIndex(n)
val, contained := m[index]
if !contained {
t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
}
val--
if val < 0 {
t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
}
m[index] = val
}
for k, remainder := range m {
if remainder != 0 {
t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
}
}
}
2017-07-06 16:55:39 +00:00
func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
t.Helper()
2017-07-06 16:55:39 +00:00
canaryIndex := make(map[string]struct{})
for _, state := range d.TaskGroups {
for _, c := range state.PlacedCanaries {
canaryIndex[c] = struct{}{}
}
}
2017-06-02 23:11:29 +00:00
for _, s := range stop {
2017-07-06 16:55:39 +00:00
if _, ok := canaryIndex[s.alloc.ID]; ok {
2017-06-02 23:11:29 +00:00
t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
}
}
}
func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
t.Helper()
2017-06-02 23:11:29 +00:00
names := make(map[string]struct{}, numPrevious)
found := 0
for _, p := range place {
if _, ok := names[p.name]; ok {
t.Fatalf("Name %q already placed", p.name)
}
names[p.name] = struct{}{}
if p.previousAlloc == nil {
continue
}
if act := p.previousAlloc.Name; p.name != act {
t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
}
found++
}
if numPrevious != found {
t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
}
}
func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
t.Helper()
names := make(map[string]struct{}, numRescheduled)
found := 0
for _, p := range place {
if _, ok := names[p.name]; ok {
t.Fatalf("Name %q already placed", p.name)
}
names[p.name] = struct{}{}
if p.previousAlloc == nil {
continue
}
if p.reschedule {
found++
}
}
if numRescheduled != found {
t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
}
}
2017-06-01 22:16:24 +00:00
func intRange(pairs ...int) []int {
if len(pairs)%2 != 0 {
return nil
}
var r []int
for i := 0; i < len(pairs); i += 2 {
for j := pairs[i]; j <= pairs[i+1]; j++ {
r = append(r, j)
}
}
return r
}
func placeResultsToNames(place []allocPlaceResult) []string {
names := make([]string, 0, len(place))
for _, p := range place {
names = append(names, p.name)
}
return names
}
2017-07-15 23:31:33 +00:00
func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
names := make([]string, 0, len(destructive))
for _, d := range destructive {
names = append(names, d.placeName)
}
return names
}
2017-06-01 22:16:24 +00:00
func stopResultsToNames(stop []allocStopResult) []string {
names := make([]string, 0, len(stop))
for _, s := range stop {
names = append(names, s.alloc.Name)
}
return names
}
2018-03-26 18:06:21 +00:00
func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
names := make([]string, 0, len(attributeUpdates))
for _, a := range attributeUpdates {
2018-03-23 23:55:21 +00:00
names = append(names, a.Name)
}
return names
}
2017-06-01 22:16:24 +00:00
func allocsToNames(allocs []*structs.Allocation) []string {
names := make([]string, 0, len(allocs))
for _, a := range allocs {
names = append(names, a.Name)
}
return names
}
type resultExpectation struct {
createDeployment *structs.Deployment
2017-06-02 23:11:29 +00:00
deploymentUpdates []*structs.DeploymentStatusUpdate
2017-06-01 22:16:24 +00:00
place int
2017-07-15 23:31:33 +00:00
destructive int
2017-06-01 22:16:24 +00:00
inplace int
2018-03-26 18:06:21 +00:00
attributeUpdates int
disconnectUpdates int
reconnectUpdates int
2017-06-01 22:16:24 +00:00
desiredTGUpdates map[string]*structs.DesiredUpdates
stop int
2017-06-01 22:16:24 +00:00
}
func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
t.Helper()
assertion := assert.New(t)
if exp.createDeployment != nil && r.deployment == nil {
t.Errorf("Expect a created deployment got none")
} else if exp.createDeployment == nil && r.deployment != nil {
t.Errorf("Expect no created deployment; got %#v", r.deployment)
} else if exp.createDeployment != nil && r.deployment != nil {
2017-06-02 23:11:29 +00:00
// Clear the deployment ID
r.deployment.ID, exp.createDeployment.ID = "", ""
if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
2017-06-02 23:11:29 +00:00
}
2017-06-01 22:16:24 +00:00
}
assertion.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
assertion.Len(r.place, exp.place, "Expected Placements")
assertion.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
assertion.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
assertion.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
assertion.Len(r.reconnectUpdates, exp.reconnectUpdates, "Expected Reconnect Updates")
assertion.Len(r.disconnectUpdates, exp.disconnectUpdates, "Expected Disconnect Updates")
assertion.Len(r.stop, exp.stop, "Expected Stops")
assertion.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
2017-06-01 22:16:24 +00:00
}
func buildAllocations(job *structs.Job, count int, clientStatus, desiredStatus string, nodeScore float64) []*structs.Allocation {
allocs := make([]*structs.Allocation, 0)
for i := 0; i < count; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.ClientStatus = clientStatus
alloc.DesiredStatus = desiredStatus
alloc.Metrics = &structs.AllocMetric{
ScoreMetaData: []*structs.NodeScoreMeta{
{
NodeID: alloc.NodeID,
NormScore: nodeScore,
Scores: map[string]float64{
alloc.NodeID: nodeScore,
},
},
},
}
allocs = append(allocs, alloc)
}
return allocs
}
func buildDisconnectedNodes(allocs []*structs.Allocation, count int) map[string]*structs.Node {
tainted := make(map[string]*structs.Node, count)
for i := 0; i < count; i++ {
n := mock.Node()
n.ID = allocs[i].NodeID
n.Status = structs.NodeStatusDisconnected
tainted[n.ID] = n
}
return tainted
}
func buildResumableAllocations(count int, clientStatus, desiredStatus string, nodeScore float64) (*structs.Job, []*structs.Allocation) {
job := mock.Job()
job.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Minute)
job.TaskGroups[0].Count = count
return job, buildAllocations(job, count, clientStatus, desiredStatus, nodeScore)
}
2017-06-01 22:16:24 +00:00
// Tests the reconciler properly handles placements for a job that has no
// existing allocations
func TestReconciler_Place_NoExisting(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
reconciler := NewAllocReconciler(
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, nil, nil, "", job.Priority, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 10,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles placements for a job that has some
// existing allocations
func TestReconciler_Place_Existing(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 3 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 5,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 5,
Ignore: 5,
},
},
})
assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles stopping allocations for a job that has
// scaled down
func TestReconciler_ScaleDown_Partial(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Has desired 10
job := mock.Job()
// Create 20 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 20; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 0,
stop: 10,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
Stop: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
}
// Tests the reconciler properly handles stopping allocations for a job that has
// scaled down to zero desired
func TestReconciler_ScaleDown_Zero(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 0
job := mock.Job()
job.TaskGroups[0].Count = 0
// Create 20 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 20; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 0,
stop: 20,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 20,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
}
// Tests the reconciler properly handles stopping allocations for a job that has
// scaled down to zero desired where allocs have duplicate names
func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
ci.Parallel(t)
// Set desired 0
job := mock.Job()
job.TaskGroups[0].Count = 0
// Create 20 existing allocations
var allocs []*structs.Allocation
var expectedStopped []int
for i := 0; i < 20; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
allocs = append(allocs, alloc)
expectedStopped = append(expectedStopped, i%2)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 20,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 20,
},
},
})
assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
}
2017-06-01 22:16:24 +00:00
// Tests the reconciler properly handles inplace upgrading allocations
func TestReconciler_Inplace(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 10,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
InPlaceUpdate: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
}
// Tests the reconciler properly handles inplace upgrading allocations while
// scaling up
func TestReconciler_Inplace_ScaleUp(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 15
job := mock.Job()
job.TaskGroups[0].Count = 15
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 5,
inplace: 10,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 5,
InPlaceUpdate: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles inplace upgrading allocations while
// scaling down
func TestReconciler_Inplace_ScaleDown(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 5,
stop: 5,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 5,
InPlaceUpdate: 5,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
}
// TestReconciler_Inplace_Rollback tests that a rollback to a previous version
// generates the expected placements for any already-running allocations of
// that version.
func TestReconciler_Inplace_Rollback(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Count = 4
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
DelayFunction: "exponential",
Interval: time.Second * 30,
Delay: time.Hour * 1,
Attempts: 3,
Unlimited: true,
}
// Create 3 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 3; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// allocs[0] is an allocation from version 0
allocs[0].ClientStatus = structs.AllocClientStatusRunning
// allocs[1] and allocs[2] are failed allocations for version 1 with
// different rescheduling states
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].TaskStates = map[string]*structs.TaskState{
"web": {FinishedAt: time.Now().Add(-10 * time.Minute)}}
allocs[2].ClientStatus = structs.AllocClientStatusFailed
// job is rolled back, we expect allocs[0] to be updated in-place
allocUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{
allocs[0].ID: allocUpdateFnInplace,
}, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFn,
false, job.ID, job, nil, allocs, nil, uuid.Generate(), 50, true)
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
inplace: 1,
stop: 1,
destructive: 1,
attributeUpdates: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
Stop: 1,
InPlaceUpdate: 1,
DestructiveUpdate: 1,
},
},
})
assert.Len(t, r.desiredFollowupEvals, 1, "expected 1 follow-up eval")
assertNamesHaveIndexes(t, intRange(0, 0), allocsToNames(r.inplaceUpdate))
assertNamesHaveIndexes(t, intRange(2, 2), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(2, 3), placeResultsToNames(r.place))
}
2017-06-01 22:16:24 +00:00
// Tests the reconciler properly handles destructive upgrading allocations
func TestReconciler_Destructive(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
destructive: 10,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
}
// Tests the reconciler properly handles destructive upgrading allocations when max_parallel=0
func TestReconciler_DestructiveMaxParallel(t *testing.T) {
ci.Parallel(t)
job := mock.MaxParallelJob()
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
2017-06-01 22:16:24 +00:00
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: 10,
2017-06-01 22:16:24 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: 10,
},
},
})
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles destructive upgrading allocations while
// scaling up
func TestReconciler_Destructive_ScaleUp(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 15
job := mock.Job()
job.TaskGroups[0].Count = 15
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
place: 5,
destructive: 10,
2017-06-01 22:16:24 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 5,
DestructiveUpdate: 10,
},
},
})
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles destructive upgrading allocations while
// scaling down
func TestReconciler_Destructive_ScaleDown(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: 5,
stop: 5,
2017-06-01 22:16:24 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 5,
DestructiveUpdate: 5,
},
},
})
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles lost nodes with allocations
func TestReconciler_LostNode(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.Node()
n.ID = allocs[i].NodeID
n.Status = structs.NodeStatusDown
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 2,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
2017-06-02 23:11:29 +00:00
Stop: 2,
2017-06-01 22:16:24 +00:00
Ignore: 8,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles lost nodes with allocations while
// scaling up
func TestReconciler_LostNode_ScaleUp(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 15
job := mock.Job()
job.TaskGroups[0].Count = 15
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.Node()
n.ID = allocs[i].NodeID
n.Status = structs.NodeStatusDown
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 7,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 7,
2017-06-02 23:11:29 +00:00
Stop: 2,
2017-06-01 22:16:24 +00:00
Ignore: 8,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles lost nodes with allocations while
// scaling down
func TestReconciler_LostNode_ScaleDown(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.Node()
n.ID = allocs[i].NodeID
n.Status = structs.NodeStatusDown
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 0,
stop: 5,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 5,
Ignore: 5,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
}
// Tests the reconciler properly handles draining nodes with allocations
func TestReconciler_DrainNode(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.DrainNode()
2017-06-01 22:16:24 +00:00
n.ID = allocs[i].NodeID
allocs[i].DesiredTransition.Migrate = pointer.Of(true)
2017-06-01 22:16:24 +00:00
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 2,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Migrate: 2,
Ignore: 8,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
2017-06-02 23:11:29 +00:00
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles draining nodes with allocations while
// scaling up
func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 15
job := mock.Job()
job.TaskGroups[0].Count = 15
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.DrainNode()
2017-06-01 22:16:24 +00:00
n.ID = allocs[i].NodeID
allocs[i].DesiredTransition.Migrate = pointer.Of(true)
2017-06-01 22:16:24 +00:00
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 7,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 5,
Migrate: 2,
Ignore: 8,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
2017-06-02 23:11:29 +00:00
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles draining nodes with allocations while
// scaling down
func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
// Set desired 8
job := mock.Job()
job.TaskGroups[0].Count = 8
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 3)
for i := 0; i < 3; i++ {
n := mock.DrainNode()
2017-06-01 22:16:24 +00:00
n.ID = allocs[i].NodeID
allocs[i].DesiredTransition.Migrate = pointer.Of(true)
2017-06-01 22:16:24 +00:00
tainted[n.ID] = n
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, tainted, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 1,
inplace: 0,
stop: 3,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Migrate: 1,
Stop: 2,
Ignore: 7,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
2017-06-02 23:11:29 +00:00
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
// These should not have the reschedule field set
assertPlacementsAreRescheduled(t, 0, r.place)
2017-06-01 22:16:24 +00:00
}
// Tests the reconciler properly handles a task group being removed
func TestReconciler_RemovedTG(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
// Create 10 allocations for a tg that no longer exists
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
oldName := job.TaskGroups[0].Name
newName := "different"
job.TaskGroups[0].Name = newName
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 10,
inplace: 0,
stop: 10,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
oldName: {
Stop: 10,
},
newName: {
Place: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
}
// Tests the reconciler properly handles a job in stopped states
func TestReconciler_JobStopped(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
job.Stop = true
cases := []struct {
name string
job *structs.Job
jobID, taskGroup string
}{
{
name: "stopped job",
job: job,
jobID: job.ID,
taskGroup: job.TaskGroups[0].Name,
},
{
name: "nil job",
job: nil,
jobID: "foo",
taskGroup: "bar",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
2017-06-02 23:11:29 +00:00
// Create 10 allocations
2017-06-01 22:16:24 +00:00
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = c.job
alloc.JobID = c.jobID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
alloc.TaskGroup = c.taskGroup
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 0,
inplace: 0,
stop: 10,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
c.taskGroup: {
Stop: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
})
}
}
// Tests the reconciler doesn't update allocs in terminal state
// when job is stopped or nil
func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.Stop = true
cases := []struct {
name string
job *structs.Job
jobID, taskGroup string
}{
{
name: "stopped job",
job: job,
jobID: job.ID,
taskGroup: job.TaskGroups[0].Name,
},
{
name: "nil job",
job: nil,
jobID: "foo",
taskGroup: "bar",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create 10 terminal allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = c.job
alloc.JobID = c.jobID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
alloc.TaskGroup = c.taskGroup
if i%2 == 0 {
alloc.DesiredStatus = structs.AllocDesiredStatusStop
} else {
alloc.ClientStatus = structs.AllocClientStatusFailed
}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
require.Len(t, r.stop, 0)
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
c.taskGroup: {},
},
})
})
}
}
2017-06-01 22:16:24 +00:00
// Tests the reconciler properly handles jobs with multiple task groups
func TestReconciler_MultiTG(t *testing.T) {
ci.Parallel(t)
2017-06-01 22:16:24 +00:00
job := mock.Job()
tg2 := job.TaskGroups[0].Copy()
tg2.Name = "foo"
job.TaskGroups = append(job.TaskGroups, tg2)
// Create 2 existing allocations for the first tg
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-01 22:16:24 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-01 22:16:24 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-06-02 23:11:29 +00:00
deploymentUpdates: nil,
2017-06-01 22:16:24 +00:00
place: 18,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 8,
Ignore: 2,
},
tg2.Name: {
Place: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
}
2017-06-02 23:11:29 +00:00
2018-04-25 23:43:44 +00:00
// Tests the reconciler properly handles jobs with multiple task groups with
// only one having an update block and a deployment already being created
func TestReconciler_MultiTG_SingleUpdateBlock(t *testing.T) {
ci.Parallel(t)
2018-04-25 23:43:44 +00:00
job := mock.Job()
tg2 := job.TaskGroups[0].Copy()
tg2.Name = "foo"
job.TaskGroups = append(job.TaskGroups, tg2)
job.TaskGroups[0].Update = noCanaryUpdate
// Create all the allocs
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
for j := 0; j < 10; j++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j))
alloc.TaskGroup = job.TaskGroups[i].Name
allocs = append(allocs, alloc)
}
}
d := structs.NewDeployment(job, 50)
2018-04-25 23:43:44 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
d, allocs, nil, "", 50, true)
2018-04-25 23:43:44 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
tg2.Name: {
Ignore: 10,
},
},
})
}
// Tests delayed rescheduling of failed batch allocations
func TestReconciler_RescheduleLater_Batch(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
2018-03-23 23:55:21 +00:00
// Set desired 4
2018-01-19 17:58:59 +00:00
job := mock.Job()
job.TaskGroups[0].Count = 4
now := time.Now()
2018-03-23 23:55:21 +00:00
// Set up reschedule policy
delayDur := 15 * time.Second
2018-03-26 19:45:09 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
tgName := job.TaskGroups[0].Name
2018-03-23 23:55:21 +00:00
// Create 6 existing allocations - 2 running, 1 complete and 3 failed
var allocs []*structs.Allocation
for i := 0; i < 6; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
2018-03-23 23:55:21 +00:00
// Mark 3 as failed with restart tracking info
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[0].NextAllocation = allocs[1].ID
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].NextAllocation = allocs[2].ID
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now}}
allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[1].ID,
PrevNodeID: uuid.Generate(),
},
}}
2018-03-23 23:55:21 +00:00
// Mark one as complete
allocs[5].ClientStatus = structs.AllocClientStatusComplete
2018-01-19 17:58:59 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
nil, allocs, nil, uuid.Generate(), 50, true)
r := reconciler.Compute()
// Two reschedule attempts were already made, one more can be made at a future time
// Verify that the follow up eval has the expected waitUntil time
evals := r.desiredFollowupEvals[tgName]
require.NotNil(evals)
require.Equal(1, len(evals))
require.Equal(now.Add(delayDur), evals[0].WaitUntil)
// Alloc 5 should not be replaced because it is terminal
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
2018-03-23 23:55:21 +00:00
inplace: 0,
2018-03-26 18:06:21 +00:00
attributeUpdates: 1,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 0,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
},
},
})
2018-03-26 18:06:21 +00:00
assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
2018-03-23 23:55:21 +00:00
// Verify that the followup evalID field is set correctly
var annotated *structs.Allocation
2018-03-26 18:06:21 +00:00
for _, a := range r.attributeUpdates {
2018-03-23 23:55:21 +00:00
annotated = a
}
require.Equal(evals[0].ID, annotated.FollowupEvalID)
}
// Tests delayed rescheduling of failed batch allocations and batching of allocs
// with fail times that are close together
func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
2018-03-23 23:55:21 +00:00
// Set desired 4
job := mock.Job()
job.TaskGroups[0].Count = 10
now := time.Now()
2018-03-23 23:55:21 +00:00
2018-01-19 17:58:59 +00:00
// Set up reschedule policy
delayDur := 15 * time.Second
2018-03-26 19:45:09 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
tgName := job.TaskGroups[0].Name
2018-03-23 23:55:21 +00:00
// Create 10 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
2018-03-23 23:55:21 +00:00
// Mark 5 as failed with fail times very close together
for i := 0; i < 5; i++ {
allocs[i].ClientStatus = structs.AllocClientStatusFailed
allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
}
// Mark two more as failed several seconds later
for i := 5; i < 7; i++ {
allocs[i].ClientStatus = structs.AllocClientStatusFailed
allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(10 * time.Second)}}
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
nil, allocs, nil, uuid.Generate(), 50, true)
r := reconciler.Compute()
// Verify that two follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.NotNil(evals)
require.Equal(2, len(evals))
// Verify expected WaitUntil values for both batched evals
require.Equal(now.Add(delayDur), evals[0].WaitUntil)
secondBatchDuration := delayDur + 10*time.Second
require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
2018-01-19 17:58:59 +00:00
// Alloc 5 should not be replaced because it is terminal
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
2018-03-23 23:55:21 +00:00
inplace: 0,
2018-03-26 18:06:21 +00:00
attributeUpdates: 7,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 0,
InPlaceUpdate: 0,
Ignore: 10,
Stop: 0,
},
},
})
2018-03-26 18:06:21 +00:00
assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
2018-03-23 23:55:21 +00:00
// Verify that the followup evalID field is set correctly
2018-03-26 18:06:21 +00:00
for _, alloc := range r.attributeUpdates {
if allocNameToIndex(alloc.Name) < 5 {
require.Equal(evals[0].ID, alloc.FollowupEvalID)
} else if allocNameToIndex(alloc.Name) < 7 {
require.Equal(evals[1].ID, alloc.FollowupEvalID)
} else {
t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
}
}
}
// Tests rescheduling failed batch allocations
func TestReconciler_RescheduleNow_Batch(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 4
job := mock.Job()
job.TaskGroups[0].Count = 4
now := time.Now()
// Set up reschedule policy
2018-03-26 19:45:09 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
tgName := job.TaskGroups[0].Name
// Create 6 existing allocations - 2 running, 1 complete and 3 failed
2018-01-19 17:58:59 +00:00
var allocs []*structs.Allocation
for i := 0; i < 6; i++ {
2018-01-19 17:58:59 +00:00
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark 3 as failed with restart tracking info
2018-01-19 17:58:59 +00:00
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[0].NextAllocation = allocs[1].ID
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].NextAllocation = allocs[2].ID
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-5 * time.Second)}}
allocs[2].FollowupEvalID = uuid.Generate()
allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[1].ID,
PrevNodeID: uuid.Generate(),
},
}}
2018-01-19 17:58:59 +00:00
// Mark one as complete
allocs[5].ClientStatus = structs.AllocClientStatusComplete
2018-01-19 17:58:59 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
nil, allocs, nil, "", 50, true)
reconciler.now = now
2018-01-19 17:58:59 +00:00
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Two reschedule attempts were made, one more can be made now
// Alloc 5 should not be replaced because it is terminal
2018-01-19 17:58:59 +00:00
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
2018-01-19 17:58:59 +00:00
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
Ignore: 3,
2018-01-19 17:58:59 +00:00
},
},
})
assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
2018-01-19 17:58:59 +00:00
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
2018-01-19 17:58:59 +00:00
}
// Tests rescheduling failed service allocations with desired state stop
func TestReconciler_RescheduleLater_Service(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
2018-03-23 23:55:21 +00:00
2018-01-19 17:58:59 +00:00
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
2018-03-23 23:55:21 +00:00
2018-01-19 17:58:59 +00:00
// Set up reschedule policy
delayDur := 15 * time.Second
2018-03-13 15:06:26 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
2018-01-19 17:58:59 +00:00
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
2018-03-23 23:55:21 +00:00
2018-01-19 17:58:59 +00:00
// Mark two as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
2018-03-23 23:55:21 +00:00
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now}}
2018-01-19 17:58:59 +00:00
allocs[1].ClientStatus = structs.AllocClientStatusFailed
// Mark one as desired state stop
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, uuid.Generate(), 50, true)
r := reconciler.Compute()
// Should place a new placement and create a follow up eval for the delayed reschedule
// Verify that the follow up eval has the expected waitUntil time
evals := r.desiredFollowupEvals[tgName]
require.NotNil(evals)
require.Equal(1, len(evals))
require.Equal(now.Add(delayDur), evals[0].WaitUntil)
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
2018-03-23 23:55:21 +00:00
inplace: 0,
2018-03-26 18:06:21 +00:00
attributeUpdates: 1,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
Stop: 0,
},
},
})
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
2018-03-26 18:06:21 +00:00
assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
2018-03-23 23:55:21 +00:00
// Verify that the followup evalID field is set correctly
var annotated *structs.Allocation
2018-03-26 18:06:21 +00:00
for _, a := range r.attributeUpdates {
2018-03-23 23:55:21 +00:00
annotated = a
}
require.Equal(evals[0].ID, annotated.FollowupEvalID)
}
// Tests service allocations with client status complete
func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
ci.Parallel(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
delayDur := 15 * time.Second
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: delayDur,
MaxDelay: 1 * time.Hour,
}
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DesiredStatus = structs.AllocDesiredStatusRun
}
// Mark one as client status complete
allocs[4].ClientStatus = structs.AllocClientStatusComplete
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Should place a new placement for the alloc that was marked complete
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
},
},
})
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
}
// Tests service job placement with desired stop and client status complete
func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
ci.Parallel(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
delayDur := 15 * time.Second
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: delayDur,
MaxDelay: 1 * time.Hour,
}
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DesiredStatus = structs.AllocDesiredStatusRun
}
// Mark one as failed but with desired status stop
// Should not trigger rescheduling logic but should trigger a placement
allocs[4].ClientStatus = structs.AllocClientStatusFailed
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Should place a new placement for the alloc that was marked stopped
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
InPlaceUpdate: 0,
Ignore: 4,
},
},
})
assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
// Should not have any follow up evals created
require := require.New(t)
require.Equal(0, len(r.desiredFollowupEvals))
}
// Tests rescheduling failed service allocations with desired state stop
func TestReconciler_RescheduleNow_Service(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
2018-03-26 18:06:21 +00:00
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
2018-03-26 18:06:21 +00:00
// Set up reschedule policy and update block
2018-03-26 18:06:21 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = noCanaryUpdate
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
2018-03-26 18:06:21 +00:00
// Mark two as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
2018-03-26 18:06:21 +00:00
2018-01-19 21:20:00 +00:00
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
2018-01-19 21:20:00 +00:00
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs[1].ClientStatus = structs.AllocClientStatusFailed
2018-01-19 21:20:00 +00:00
2018-01-19 17:58:59 +00:00
// Mark one as desired state stop
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2018-01-19 17:58:59 +00:00
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
2018-01-19 17:58:59 +00:00
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
2018-01-19 21:20:00 +00:00
place: 2,
2018-01-19 17:58:59 +00:00
inplace: 0,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
2018-01-19 17:58:59 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
2018-01-19 21:20:00 +00:00
Place: 2,
Ignore: 3,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
2018-01-19 17:58:59 +00:00
},
},
})
// Rescheduled allocs should have previous allocs
2018-03-26 18:06:21 +00:00
assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
2018-01-19 21:20:00 +00:00
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
2018-01-19 17:58:59 +00:00
}
// Tests rescheduling failed service allocations when there's clock drift (upto a second)
func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = noCanaryUpdate
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark one as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
// Set fail time to 4 seconds ago which falls within the reschedule window
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-4 * time.Second)}}
allocs[1].ClientStatus = structs.AllocClientStatusFailed
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
reconciler.now = now
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc was placed
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
Ignore: 4,
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = noCanaryUpdate
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark one as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
// Set fail time to 5 seconds ago and eval ID
evalID := uuid.Generate()
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-5 * time.Second)}}
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].FollowupEvalID = evalID
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, evalID, 50, true)
reconciler.now = now.Add(-30 * time.Second)
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc was placed
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
Ignore: 4,
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests rescheduling failed service allocations when there are canaries
func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = canaryUpdate
job2 := job.Copy()
job2.Version++
d := structs.NewDeployment(job2, 50)
d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
s := &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 5,
}
d.TaskGroups[job.TaskGroups[0].Name] = s
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark three as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs[1].ClientStatus = structs.AllocClientStatusFailed
// Mark one as desired state stop
allocs[4].ClientStatus = structs.AllocClientStatusFailed
// Create 2 canary allocations
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Healthy: pointer.Of(false),
}
s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
d, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 2,
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 2,
Ignore: 5,
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
assertPlacementsAreRescheduled(t, 2, r.place)
}
2018-04-20 21:59:08 +00:00
// Tests rescheduling failed canary service allocations
func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
ci.Parallel(t)
2018-04-20 21:59:08 +00:00
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Delay: 5 * time.Second,
DelayFunction: "constant",
MaxDelay: 1 * time.Hour,
Unlimited: true,
}
job.TaskGroups[0].Update = canaryUpdate
job2 := job.Copy()
job2.Version++
d := structs.NewDeployment(job2, 50)
d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
s := &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 5,
}
d.TaskGroups[job.TaskGroups[0].Name] = s
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Create 2 healthy canary allocations
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Healthy: pointer.Of(false),
}
s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
allocs = append(allocs, alloc)
}
// Mark the canaries as failed
allocs[5].ClientStatus = structs.AllocClientStatusFailed
allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
// Mark one of them as already rescheduled once
allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs[6].ClientStatus = structs.AllocClientStatusFailed
allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
// Create 4 unhealthy canary allocations that have already been replaced
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Healthy: pointer.Of(false),
}
s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
d, allocs, nil, "", 50, true)
reconciler.now = now
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 2,
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 2,
Ignore: 9,
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
assertPlacementsAreRescheduled(t, 2, r.place)
}
// Tests rescheduling failed canary service allocations when one has reached its
// reschedule limit
func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
2018-04-20 21:59:08 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = canaryUpdate
job2 := job.Copy()
job2.Version++
d := structs.NewDeployment(job2, 50)
2018-04-20 21:59:08 +00:00
d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
s := &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 5,
}
d.TaskGroups[job.TaskGroups[0].Name] = s
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Create 2 healthy canary allocations
2018-04-20 21:59:08 +00:00
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Healthy: pointer.Of(false),
2018-04-20 21:59:08 +00:00
}
s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
allocs = append(allocs, alloc)
}
// Mark the canaries as failed
allocs[5].ClientStatus = structs.AllocClientStatusFailed
allocs[5].DesiredTransition.Reschedule = pointer.Of(true)
2018-04-20 21:59:08 +00:00
// Mark one of them as already rescheduled once
allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
2018-04-20 21:59:08 +00:00
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
2018-04-20 21:59:08 +00:00
allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs[6].ClientStatus = structs.AllocClientStatusFailed
allocs[6].DesiredTransition.Reschedule = pointer.Of(true)
// Create 4 unhealthy canary allocations that have already been replaced
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Healthy: pointer.Of(false),
}
s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
allocs = append(allocs, alloc)
}
2018-04-20 21:59:08 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2,
d, allocs, nil, "", 50, true)
reconciler.now = now
2018-04-20 21:59:08 +00:00
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
2018-04-20 21:59:08 +00:00
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
Ignore: 10,
2018-04-20 21:59:08 +00:00
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests failed service allocations that were already rescheduled won't be rescheduled again
func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
ci.Parallel(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
// Set up reschedule policy
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
// Create 7 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 7; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark two as failed and rescheduled
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[0].ID = allocs[1].ID
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].NextAllocation = allocs[2].ID
// Mark one as desired state stop
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Should place 1 - one is a new placement to make up the desired count of 5
// failing allocs are not rescheduled
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Ignore: 4,
},
},
})
// name index 0 is used for the replacement because its
assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
}
2017-06-02 23:11:29 +00:00
// Tests the reconciler cancels an old deployment when the job is being stopped
func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.Stop = true
running := structs.NewDeployment(job, 50)
failed := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
failed.Status = structs.DeploymentStatusFailed
cases := []struct {
name string
job *structs.Job
jobID, taskGroup string
deployment *structs.Deployment
cancel bool
}{
{
name: "stopped job, running deployment",
job: job,
jobID: job.ID,
taskGroup: job.TaskGroups[0].Name,
deployment: running,
cancel: true,
},
{
name: "nil job, running deployment",
job: nil,
jobID: "foo",
taskGroup: "bar",
deployment: running,
cancel: true,
},
{
name: "stopped job, failed deployment",
job: job,
jobID: job.ID,
taskGroup: job.TaskGroups[0].Name,
deployment: failed,
cancel: false,
},
{
name: "nil job, failed deployment",
job: nil,
jobID: "foo",
taskGroup: "bar",
deployment: failed,
cancel: false,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create 10 allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = c.job
alloc.JobID = c.jobID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
alloc.TaskGroup = c.taskGroup
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job,
c.deployment, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
var updates []*structs.DeploymentStatusUpdate
if c.cancel {
updates = []*structs.DeploymentStatusUpdate{
{
DeploymentID: c.deployment.ID,
Status: structs.DeploymentStatusCancelled,
StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
},
}
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 10,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
c.taskGroup: {
Stop: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
})
}
}
// Tests the reconciler cancels an old deployment when the job is updated
func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
// Create a base job
job := mock.Job()
// Create two deployments
running := structs.NewDeployment(job, 50)
failed := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
failed.Status = structs.DeploymentStatusFailed
// Make the job newer than the deployment
job.Version += 10
2017-06-02 23:11:29 +00:00
cases := []struct {
name string
deployment *structs.Deployment
cancel bool
}{
{
name: "running deployment",
deployment: running,
cancel: true,
},
{
name: "failed deployment",
deployment: failed,
cancel: false,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create 10 allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
c.deployment, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
var updates []*structs.DeploymentStatusUpdate
if c.cancel {
updates = []*structs.DeploymentStatusUpdate{
{
DeploymentID: c.deployment.ID,
Status: structs.DeploymentStatusCancelled,
StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
},
}
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
})
}
}
2017-06-06 21:08:46 +00:00
// Tests the reconciler creates a deployment and does a rolling upgrade with
// destructive changes
func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: d,
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: 4,
2017-06-02 23:11:29 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: 4,
Ignore: 6,
},
},
})
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
2017-06-02 23:11:29 +00:00
}
2017-06-06 21:08:46 +00:00
// Tests the reconciler creates a deployment for inplace updates
func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
ci.Parallel(t)
2018-03-23 23:55:21 +00:00
jobOld := mock.Job()
job := jobOld.Copy()
job.Version++
2017-06-06 21:08:46 +00:00
job.TaskGroups[0].Update = noCanaryUpdate
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
2018-03-23 23:55:21 +00:00
alloc.Job = jobOld
2017-06-06 21:08:46 +00:00
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-06 21:08:46 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-06 21:08:46 +00:00
r := reconciler.Compute()
d := structs.NewDeployment(job, 50)
2017-06-06 21:08:46 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: d,
deploymentUpdates: nil,
place: 0,
inplace: 10,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
InPlaceUpdate: 10,
},
},
})
}
// Tests the reconciler creates a deployment when the job has a newer create index
func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
ci.Parallel(t)
jobOld := mock.Job()
job := jobOld.Copy()
job.TaskGroups[0].Update = noCanaryUpdate
2018-06-05 22:29:59 +00:00
job.CreateIndex += 100
// Create 5 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = jobOld
alloc.JobID = jobOld.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
d := structs.NewDeployment(job, 50)
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 5,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: d,
deploymentUpdates: nil,
place: 5,
destructive: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
InPlaceUpdate: 0,
Ignore: 5,
Place: 5,
DestructiveUpdate: 0,
},
},
})
}
2017-06-06 21:08:46 +00:00
// Tests the reconciler doesn't creates a deployment if there are no changes
func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
ci.Parallel(t)
2017-06-06 21:08:46 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create 10 allocations from the job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-06 21:08:46 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-06 21:08:46 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: 0,
Ignore: 10,
},
},
})
}
2017-06-02 23:11:29 +00:00
// Tests the reconciler doesn't place any more canaries when the deployment is
// paused or failed
func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
cases := []struct {
name string
deploymentStatus string
2017-07-05 19:50:40 +00:00
stop uint64
2017-06-02 23:11:29 +00:00
}{
{
name: "paused deployment",
deploymentStatus: structs.DeploymentStatusPaused,
2017-07-05 19:50:40 +00:00
stop: 0,
2017-06-02 23:11:29 +00:00
},
{
name: "failed deployment",
deploymentStatus: structs.DeploymentStatusFailed,
2017-07-05 19:50:40 +00:00
stop: 1,
2017-06-02 23:11:29 +00:00
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
2017-07-05 19:50:40 +00:00
// Create a deployment that is paused/failed and has placed some canaries
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.Status = c.deploymentStatus
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredCanaries: 2,
DesiredTotal: 10,
PlacedAllocs: 1,
}
// Create 10 allocations for the original job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
canary.TaskGroup = job.TaskGroups[0].Name
canary.DeploymentID = d.ID
allocs = append(allocs, canary)
2017-07-05 19:50:40 +00:00
d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
2017-06-02 23:11:29 +00:00
mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
2017-07-05 19:50:40 +00:00
stop: int(c.stop),
2017-06-02 23:11:29 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
2017-07-05 19:50:40 +00:00
Ignore: 11 - c.stop,
Stop: c.stop,
2017-06-02 23:11:29 +00:00
},
},
})
})
}
}
// Tests the reconciler doesn't place any more allocs when the deployment is
// paused or failed
func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
job.TaskGroups[0].Count = 15
cases := []struct {
name string
deploymentStatus string
}{
{
name: "paused deployment",
deploymentStatus: structs.DeploymentStatusPaused,
},
{
name: "failed deployment",
deploymentStatus: structs.DeploymentStatusFailed,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a deployment that is paused and has placed some canaries
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.Status = c.deploymentStatus
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 15,
PlacedAllocs: 10,
}
// Create 10 allocations for the new job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
})
}
}
// Tests the reconciler doesn't do any more destructive updates when the
// deployment is paused or failed
func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
cases := []struct {
name string
deploymentStatus string
}{
{
name: "paused deployment",
deploymentStatus: structs.DeploymentStatusPaused,
},
{
name: "failed deployment",
deploymentStatus: structs.DeploymentStatusFailed,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a deployment that is paused and has placed some canaries
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.Status = c.deploymentStatus
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 10,
PlacedAllocs: 1,
}
// Create 9 allocations for the original job
var allocs []*structs.Allocation
for i := 1; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create one for the new job
newAlloc := mock.Alloc()
newAlloc.Job = job
newAlloc.JobID = job.ID
newAlloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
newAlloc.TaskGroup = job.TaskGroups[0].Name
newAlloc.DeploymentID = d.ID
allocs = append(allocs, newAlloc)
mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
})
}
}
// Tests the reconciler handles migrating a canary correctly on a draining node
func TestReconciler_DrainNode_Canary(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
// Create a deployment that is paused and has placed some canaries
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: false,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 2,
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create two canaries for the new job
handled := make(map[string]allocUpdateType)
for i := 0; i < 2; i++ {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
canary.DeploymentID = d.ID
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
allocs = append(allocs, canary)
handled[canary.ID] = allocUpdateFnIgnore
}
// Build a map of tainted nodes that contains the last canary
tainted := make(map[string]*structs.Node, 1)
n := mock.DrainNode()
2017-06-02 23:11:29 +00:00
n.ID = allocs[11].NodeID
allocs[11].DesiredTransition.Migrate = pointer.Of(true)
2017-06-02 23:11:29 +00:00
tainted[n.ID] = n
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, tainted, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 1,
Ignore: 11,
},
},
})
assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
}
// Tests the reconciler handles migrating a canary correctly on a lost node
func TestReconciler_LostNode_Canary(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
// Create a deployment that is paused and has placed some canaries
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: false,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 2,
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create two canaries for the new job
handled := make(map[string]allocUpdateType)
for i := 0; i < 2; i++ {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
canary.DeploymentID = d.ID
allocs = append(allocs, canary)
handled[canary.ID] = allocUpdateFnIgnore
}
// Build a map of tainted nodes that contains the last canary
tainted := make(map[string]*structs.Node, 1)
n := mock.Node()
n.ID = allocs[11].NodeID
n.Status = structs.NodeStatusDown
tainted[n.ID] = n
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, tainted, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 1,
Ignore: 11,
},
},
})
assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
}
// Tests the reconciler handles stopping canaries from older deployments
func TestReconciler_StopOldCanaries(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
// Create an old deployment that has placed some canaries
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: false,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 2,
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Update the job
job.Version += 10
2017-06-02 23:11:29 +00:00
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create canaries
for i := 0; i < 2; i++ {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
canary.DeploymentID = d.ID
allocs = append(allocs, canary)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d,
allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
2017-06-02 23:11:29 +00:00
newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusCancelled,
StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
},
},
place: 2,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Stop: 2,
Ignore: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
}
// Tests the reconciler creates new canaries when the job changes
func TestReconciler_NewCanaries(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
2017-06-02 23:11:29 +00:00
newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: nil,
place: 2,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Ignore: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
}
// Tests the reconciler creates new canaries when the job changes and the
// canary count is greater than the task group count
func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Count = 3
job.TaskGroups[0].Update = canaryUpdate.Copy()
job.TaskGroups[0].Update.Canary = 7
// Create 3 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 3; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
state := &structs.DeploymentState{
DesiredCanaries: 7,
DesiredTotal: 3,
}
newD.TaskGroups[job.TaskGroups[0].Name] = state
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: nil,
place: 7,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 7,
Ignore: 3,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
}
// Tests the reconciler creates new canaries when the job changes for multiple
// task groups
func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
job.TaskGroups[0].Name = "tg2"
// Create 10 allocations from the old job for each tg
var allocs []*structs.Allocation
for j := 0; j < 2; j++ {
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[j].Name
allocs = append(allocs, alloc)
}
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
state := &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 10,
}
newD.TaskGroups[job.TaskGroups[0].Name] = state
newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: nil,
place: 4,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Ignore: 10,
},
job.TaskGroups[1].Name: {
Canary: 2,
Ignore: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
}
2017-06-02 23:11:29 +00:00
// Tests the reconciler creates new canaries when the job changes and scales up
func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
// Scale the job up to 15
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
job.TaskGroups[0].Count = 15
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
2017-06-02 23:11:29 +00:00
newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 15,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: nil,
place: 2,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Ignore: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
}
// Tests the reconciler creates new canaries when the job changes and scales
// down
func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
// Scale the job down to 5
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
job.TaskGroups[0].Count = 5
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
newD := structs.NewDeployment(job, 50)
newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
2017-06-02 23:11:29 +00:00
newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredCanaries: 2,
DesiredTotal: 5,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: newD,
deploymentUpdates: nil,
place: 2,
inplace: 0,
stop: 5,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Stop: 5,
Ignore: 5,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
}
// Tests the reconciler handles filling the names of partially placed canaries
func TestReconciler_NewCanaries_FillNames(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = &structs.UpdateStrategy{
Canary: 4,
MaxParallel: 2,
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
MinHealthyTime: 10 * time.Second,
HealthyDeadline: 10 * time.Minute,
}
// Create an existing deployment that has placed some canaries
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: false,
DesiredTotal: 10,
DesiredCanaries: 4,
PlacedAllocs: 2,
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create canaries but pick names at the ends
for i := 0; i < 4; i += 3 {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
canary.DeploymentID = d.ID
allocs = append(allocs, canary)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Canary: 2,
Ignore: 12,
},
},
})
assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
}
// Tests the reconciler handles canary promotion by unblocking max_parallel
func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
// Create an existing deployment that has placed some canaries and mark them
// promoted
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: true,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 2,
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the canaries
handled := make(map[string]allocUpdateType)
for i := 0; i < 2; i++ {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
canary.DeploymentID = d.ID
canary.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
allocs = append(allocs, canary)
handled[canary.ID] = allocUpdateFnIgnore
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: 2,
stop: 2,
2017-06-02 23:11:29 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 2,
DestructiveUpdate: 2,
Ignore: 8,
},
},
})
2017-07-06 16:55:39 +00:00
assertNoCanariesStopped(t, d, r.stop)
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
2017-06-02 23:11:29 +00:00
}
// Tests the reconciler handles canary promotion when the canary count equals
// the total correctly
func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
job.TaskGroups[0].Count = 2
// Create an existing deployment that has placed some canaries and mark them
// promoted
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
s := &structs.DeploymentState{
2017-06-02 23:11:29 +00:00
Promoted: true,
DesiredTotal: 2,
DesiredCanaries: 2,
PlacedAllocs: 2,
2018-04-08 23:09:14 +00:00
HealthyAllocs: 2,
2017-06-02 23:11:29 +00:00
}
2017-07-05 19:50:40 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = s
2017-06-02 23:11:29 +00:00
// Create 2 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 2; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the canaries
handled := make(map[string]allocUpdateType)
for i := 0; i < 2; i++ {
// Create one canary
canary := mock.Alloc()
canary.Job = job
canary.JobID = job.ID
canary.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
canary.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
2017-06-02 23:11:29 +00:00
canary.DeploymentID = d.ID
canary.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
allocs = append(allocs, canary)
handled[canary.ID] = allocUpdateFnIgnore
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
2017-07-06 15:39:16 +00:00
updates := []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusSuccessful,
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
},
}
2017-06-02 23:11:29 +00:00
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
2017-07-06 15:39:16 +00:00
deploymentUpdates: updates,
2017-06-02 23:11:29 +00:00
place: 0,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 2,
Ignore: 2,
},
},
})
2017-07-06 16:55:39 +00:00
assertNoCanariesStopped(t, d, r.stop)
2017-06-02 23:11:29 +00:00
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
}
// Tests the reconciler checks the health of placed allocs to determine the
// limit
func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
cases := []struct {
healthy int
}{
{
healthy: 0,
},
{
healthy: 1,
},
{
healthy: 2,
},
{
healthy: 3,
},
{
healthy: 4,
},
}
for _, c := range cases {
t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
// Create an existing deployment that has placed some canaries and mark them
// promoted
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
PlacedAllocs: 4,
}
// Create 6 allocations from the old job
var allocs []*structs.Allocation
for i := 4; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the new allocs
handled := make(map[string]allocUpdateType)
for i := 0; i < 4; i++ {
new := mock.Alloc()
new.Job = job
new.JobID = job.ID
new.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
if i < c.healthy {
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
}
allocs = append(allocs, new)
handled[new.ID] = allocUpdateFnIgnore
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: c.healthy,
2017-06-02 23:11:29 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: uint64(c.healthy),
Ignore: uint64(10 - c.healthy),
},
},
})
if c.healthy != 0 {
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
2017-06-02 23:11:29 +00:00
}
})
}
}
// Tests the reconciler handles an alloc on a tainted node during a rolling
// update
func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create an existing deployment that has some placed allocs
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
2017-07-15 23:31:33 +00:00
PlacedAllocs: 7,
2017-06-02 23:11:29 +00:00
}
// Create 2 allocations from the old job
2017-06-02 23:11:29 +00:00
var allocs []*structs.Allocation
for i := 8; i < 10; i++ {
2017-06-02 23:11:29 +00:00
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the healthy replacements
handled := make(map[string]allocUpdateType)
for i := 0; i < 8; i++ {
2017-06-02 23:11:29 +00:00
new := mock.Alloc()
new.Job = job
new.JobID = job.ID
new.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
allocs = append(allocs, new)
handled[new.ID] = allocUpdateFnIgnore
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 3)
for i := 0; i < 3; i++ {
2017-06-02 23:11:29 +00:00
n := mock.Node()
n.ID = allocs[2+i].NodeID
2017-06-02 23:11:29 +00:00
if i == 0 {
n.Status = structs.NodeStatusDown
} else {
n.DrainStrategy = mock.DrainNode().DrainStrategy
allocs[2+i].DesiredTransition.Migrate = pointer.Of(true)
2017-06-02 23:11:29 +00:00
}
tainted[n.ID] = n
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, tainted, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
2018-03-12 20:44:33 +00:00
place: 3,
destructive: 2,
2018-03-12 20:44:33 +00:00
stop: 3,
2017-06-02 23:11:29 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1, // Place the lost
Stop: 1, // Stop the lost
2018-03-12 20:44:33 +00:00
Migrate: 2, // Migrate the tainted
DestructiveUpdate: 2,
2018-03-12 20:44:33 +00:00
Ignore: 5,
2017-06-02 23:11:29 +00:00
},
},
})
assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
2018-03-12 20:44:33 +00:00
assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
2017-06-02 23:11:29 +00:00
}
// Tests the reconciler handles a failed deployment with allocs on tainted
// nodes
func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create an existing failed deployment that has some placed allocs
d := structs.NewDeployment(job, 50)
2017-06-02 23:11:29 +00:00
d.Status = structs.DeploymentStatusFailed
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
PlacedAllocs: 4,
}
// Create 6 allocations from the old job
var allocs []*structs.Allocation
for i := 4; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the healthy replacements
handled := make(map[string]allocUpdateType)
for i := 0; i < 4; i++ {
new := mock.Alloc()
new.Job = job
new.JobID = job.ID
new.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
allocs = append(allocs, new)
handled[new.ID] = allocUpdateFnIgnore
}
// Build a map of tainted nodes
tainted := make(map[string]*structs.Node, 2)
for i := 0; i < 2; i++ {
n := mock.Node()
n.ID = allocs[6+i].NodeID
if i == 0 {
n.Status = structs.NodeStatusDown
} else {
n.DrainStrategy = mock.DrainNode().DrainStrategy
allocs[6+i].DesiredTransition.Migrate = pointer.Of(true)
2017-06-02 23:11:29 +00:00
}
tainted[n.ID] = n
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, tainted, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
2017-06-02 23:11:29 +00:00
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Migrate: 1,
Stop: 1,
Ignore: 8,
2017-06-02 23:11:29 +00:00
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
2017-06-02 23:11:29 +00:00
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
}
// Tests the reconciler handles a run after a deployment is complete
// successfully.
func TestReconciler_CompleteDeployment(t *testing.T) {
ci.Parallel(t)
2017-06-02 23:11:29 +00:00
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
d.Status = structs.DeploymentStatusSuccessful
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 10,
HealthyAllocs: 10,
}
2017-06-02 23:11:29 +00:00
// Create allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-06-02 23:11:29 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
2017-07-05 19:50:40 +00:00
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-06-02 23:11:29 +00:00
}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-06-02 23:11:29 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
}
2017-07-05 19:50:40 +00:00
// Tests that the reconciler marks a deployment as complete once there is
// nothing left to place even if there are failed allocations that are part of
// the deployment.
func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
d := structs.NewDeployment(job, 50)
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
PlacedAllocs: 20,
HealthyAllocs: 10,
}
// Create 10 healthy allocs and 10 allocs that are failed
var allocs []*structs.Allocation
for i := 0; i < 20; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
if i < 10 {
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.DeploymentStatus.Healthy = pointer.Of(true)
} else {
alloc.DesiredStatus = structs.AllocDesiredStatusStop
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.DeploymentStatus.Healthy = pointer.Of(false)
}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID,
job, d, allocs, nil, "", 50, true)
r := reconciler.Compute()
updates := []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusSuccessful,
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
},
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
}
2017-07-05 19:50:40 +00:00
// Test that a failed deployment cancels non-promoted canaries
func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
ci.Parallel(t)
2017-07-05 19:50:40 +00:00
// Create a job with two task groups
job := mock.Job()
job.TaskGroups[0].Update = canaryUpdate
job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
job.TaskGroups[1].Name = "two"
// Create an existing failed deployment that has promoted one task group
d := structs.NewDeployment(job, 50)
2017-07-05 19:50:40 +00:00
d.Status = structs.DeploymentStatusFailed
s0 := &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 4,
}
s1 := &structs.DeploymentState{
Promoted: false,
DesiredTotal: 10,
DesiredCanaries: 2,
PlacedAllocs: 2,
}
d.TaskGroups[job.TaskGroups[0].Name] = s0
d.TaskGroups[job.TaskGroups[1].Name] = s1
// Create 6 allocations from the old job
var allocs []*structs.Allocation
handled := make(map[string]allocUpdateType)
for _, group := range []int{0, 1} {
replacements := 4
state := s0
if group == 1 {
replacements = 2
state = s1
}
// Create the healthy replacements
for i := 0; i < replacements; i++ {
new := mock.Alloc()
new.Job = job
new.JobID = job.ID
new.NodeID = uuid.Generate()
2017-07-05 19:50:40 +00:00
new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
new.TaskGroup = job.TaskGroups[group].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-07-05 19:50:40 +00:00
}
allocs = append(allocs, new)
handled[new.ID] = allocUpdateFnIgnore
// Add the alloc to the canary list
if i < 2 {
state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
}
}
for i := replacements; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-07-05 19:50:40 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[group].Name
allocs = append(allocs, alloc)
}
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-07-05 19:50:40 +00:00
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
inplace: 0,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
job.TaskGroups[1].Name: {
Stop: 2,
Ignore: 8,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
}
2017-07-05 19:55:51 +00:00
2017-07-06 02:46:57 +00:00
// Test that a failed deployment and updated job works
func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
ci.Parallel(t)
2017-07-06 02:46:57 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create an existing failed deployment that has some placed allocs
d := structs.NewDeployment(job, 50)
2017-07-06 02:46:57 +00:00
d.Status = structs.DeploymentStatusFailed
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
PlacedAllocs: 4,
}
// Create 6 allocations from the old job
var allocs []*structs.Allocation
for i := 4; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-07-06 02:46:57 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create the healthy replacements
for i := 0; i < 4; i++ {
new := mock.Alloc()
new.Job = job
new.JobID = job.ID
new.NodeID = uuid.Generate()
2017-07-06 02:46:57 +00:00
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-07-06 02:46:57 +00:00
}
allocs = append(allocs, new)
}
// Up the job version
jobNew := job.Copy()
jobNew.Version += 100
2017-07-06 02:46:57 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew,
d, allocs, nil, "", 50, true)
2017-07-06 02:46:57 +00:00
r := reconciler.Compute()
dnew := structs.NewDeployment(jobNew, 50)
2017-07-06 02:46:57 +00:00
dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: dnew,
deploymentUpdates: nil,
2017-07-15 23:31:33 +00:00
destructive: 4,
2017-07-06 02:46:57 +00:00
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
DestructiveUpdate: 4,
Ignore: 6,
},
},
})
2017-07-15 23:31:33 +00:00
assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
2017-07-06 02:46:57 +00:00
}
2017-07-06 15:39:16 +00:00
// Tests the reconciler marks a deployment as complete
func TestReconciler_MarkDeploymentComplete(t *testing.T) {
ci.Parallel(t)
2017-07-06 15:39:16 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
d := structs.NewDeployment(job, 50)
2017-07-06 15:39:16 +00:00
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 10,
PlacedAllocs: 10,
HealthyAllocs: 10,
}
// Create allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
2017-07-06 15:39:16 +00:00
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
2017-07-06 15:39:16 +00:00
}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
d, allocs, nil, "", 50, true)
2017-07-06 15:39:16 +00:00
r := reconciler.Compute()
updates := []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusSuccessful,
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
},
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 10,
},
},
})
}
// Tests the reconciler handles changing a job such that a deployment is created
// while doing a scale up but as the second eval.
func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
ci.Parallel(t)
// Scale the job up to 15
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
job.TaskGroups[0].Count = 30
// Create a deployment that is paused and has placed some canaries
d := structs.NewDeployment(job, 50)
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 30,
PlacedAllocs: 20,
}
// Create 10 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
// Create 20 from new job
handled := make(map[string]allocUpdateType)
for i := 10; i < 30; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.DeploymentID = d.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
handled[alloc.ID] = allocUpdateFnIgnore
}
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job,
d, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
// All should be ignored because nothing has been marked as
// healthy.
Ignore: 30,
},
},
})
}
// Tests the reconciler doesn't stop allocations when doing a rolling upgrade
// where the count of the old job allocs is < desired count.
func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
// Create 7 allocations from the old job
var allocs []*structs.Allocation
for i := 0; i < 7; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
d := structs.NewDeployment(job, 50)
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
DesiredTotal: 10,
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: d,
deploymentUpdates: nil,
place: 3,
destructive: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 3,
DestructiveUpdate: 1,
Ignore: 6,
},
},
})
assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
}
// Tests that the reconciler handles rerunning a batch job in the case that the
// allocations are from an older instance of the job.
func TestReconciler_Batch_Rerun(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.Type = structs.JobTypeBatch
job.TaskGroups[0].Update = nil
// Create 10 allocations from the old job and have them be complete
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.ClientStatus = structs.AllocClientStatusComplete
alloc.DesiredStatus = structs.AllocDesiredStatusStop
allocs = append(allocs, alloc)
}
// Create a copy of the job that is "new"
job2 := job.Copy()
job2.CreateIndex++
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 10,
destructive: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 10,
DestructiveUpdate: 0,
Ignore: 10,
},
},
})
assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
}
2018-01-19 17:58:59 +00:00
// Test that a failed deployment will not result in rescheduling failed allocations
func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
ci.Parallel(t)
2018-01-19 17:58:59 +00:00
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
tgName := job.TaskGroups[0].Name
now := time.Now()
2018-01-19 17:58:59 +00:00
// Create an existing failed deployment that has some placed allocs
d := structs.NewDeployment(job, 50)
2018-01-19 17:58:59 +00:00
d.Status = structs.DeploymentStatusFailed
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: true,
DesiredTotal: 5,
PlacedAllocs: 4,
}
// Create 4 allocations and mark two as failed
var allocs []*structs.Allocation
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
2018-01-19 17:58:59 +00:00
allocs = append(allocs, alloc)
}
//create some allocations that are reschedulable now
2018-01-19 17:58:59 +00:00
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
2018-01-19 17:58:59 +00:00
allocs[3].ClientStatus = structs.AllocClientStatusFailed
allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
2018-01-19 17:58:59 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
d, allocs, nil, "", 50, true)
2018-01-19 17:58:59 +00:00
r := reconciler.Compute()
// Assert that no rescheduled placements were created
assertResults(t, r, &resultExpectation{
place: 0,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Ignore: 2,
},
},
})
}
2018-04-08 23:09:14 +00:00
// Test that a running deployment with failed allocs will not result in
// rescheduling failed allocations unless they are marked as reschedulable.
func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
tgName := job.TaskGroups[0].Name
now := time.Now()
// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
d := structs.NewDeployment(job, 50)
d.Status = structs.DeploymentStatusRunning
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
2018-04-08 23:09:14 +00:00
DesiredTotal: 10,
PlacedAllocs: 10,
}
2018-04-08 23:09:14 +00:00
// Create 10 allocations
var allocs []*structs.Allocation
2018-04-08 23:09:14 +00:00
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
2018-04-08 23:09:14 +00:00
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs = append(allocs, alloc)
}
2018-04-08 23:09:14 +00:00
// Mark half of them as reschedulable
for i := 0; i < 5; i++ {
allocs[i].DesiredTransition.Reschedule = pointer.Of(true)
2018-04-08 23:09:14 +00:00
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
d, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Assert that no rescheduled placements were created
assertResults(t, r, &resultExpectation{
2018-04-08 23:09:14 +00:00
place: 5,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 5,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
2018-04-08 23:09:14 +00:00
Place: 5,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 5,
2018-04-08 23:09:14 +00:00
Ignore: 5,
},
},
})
}
// Test that a failed deployment cancels non-promoted canaries
func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
ci.Parallel(t)
// Create a job
job := mock.Job()
job.TaskGroups[0].Count = 3
job.TaskGroups[0].Update = &structs.UpdateStrategy{
Canary: 3,
MaxParallel: 2,
HealthCheck: structs.UpdateStrategyHealthCheck_Checks,
MinHealthyTime: 10 * time.Second,
HealthyDeadline: 10 * time.Minute,
Stagger: 31 * time.Second,
}
// Create v1 of the job
jobv1 := job.Copy()
jobv1.Version = 1
jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
// Create v2 of the job
jobv2 := job.Copy()
jobv2.Version = 2
jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
d := structs.NewDeployment(jobv2, 50)
state := &structs.DeploymentState{
2018-04-08 23:09:14 +00:00
Promoted: true,
DesiredTotal: 3,
PlacedAllocs: 3,
HealthyAllocs: 3,
}
d.TaskGroups[job.TaskGroups[0].Name] = state
// Create the original
var allocs []*structs.Allocation
for i := 0; i < 3; i++ {
new := mock.Alloc()
new.Job = jobv2
new.JobID = job.ID
new.NodeID = uuid.Generate()
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = d.ID
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(true),
}
new.ClientStatus = structs.AllocClientStatusRunning
allocs = append(allocs, new)
}
for i := 0; i < 3; i++ {
new := mock.Alloc()
new.Job = jobv1
new.JobID = jobv1.ID
new.NodeID = uuid.Generate()
new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
new.TaskGroup = job.TaskGroups[0].Name
new.DeploymentID = uuid.Generate()
new.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: pointer.Of(false),
}
new.DesiredStatus = structs.AllocDesiredStatusStop
new.ClientStatus = structs.AllocClientStatusFailed
allocs = append(allocs, new)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2,
d, allocs, nil, "", 50, true)
r := reconciler.Compute()
updates := []*structs.DeploymentStatusUpdate{
{
DeploymentID: d.ID,
Status: structs.DeploymentStatusSuccessful,
StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
},
}
// Assert the correct results
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: updates,
place: 0,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Stop: 0,
InPlaceUpdate: 0,
Ignore: 3,
},
},
})
}
// Test that a successful deployment with failed allocs will result in
// rescheduling failed allocations
func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
ci.Parallel(t)
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate
tgName := job.TaskGroups[0].Name
now := time.Now()
// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
d := structs.NewDeployment(job, 50)
d.Status = structs.DeploymentStatusSuccessful
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
Promoted: false,
DesiredTotal: 10,
PlacedAllocs: 10,
}
// Create 10 allocations
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
alloc.TaskGroup = job.TaskGroups[0].Name
alloc.DeploymentID = d.ID
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs = append(allocs, alloc)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job,
d, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Assert that rescheduled placements were created
assertResults(t, r, &resultExpectation{
place: 10,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 10,
createDeployment: nil,
deploymentUpdates: nil,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 10,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 10,
Ignore: 0,
},
},
})
assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
}
2018-05-09 16:30:42 +00:00
2018-05-09 21:01:34 +00:00
// Tests force rescheduling a failed alloc that is past its reschedule limit
2018-05-09 16:30:42 +00:00
func TestReconciler_ForceReschedule_Service(t *testing.T) {
ci.Parallel(t)
2018-05-09 16:30:42 +00:00
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
// Set up reschedule policy and update block
2018-05-09 16:30:42 +00:00
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 1,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = noCanaryUpdate
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark one as failed and past its reschedule limit so not eligible to reschedule
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
// Mark DesiredTransition ForceReschedule
allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: pointer.Of(true)}
2018-05-09 16:30:42 +00:00
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
2018-05-09 16:30:42 +00:00
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// Verify that one rescheduled alloc was created because of the forced reschedule
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
stop: 1,
2018-05-09 16:30:42 +00:00
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop allocs to be rescheduled Currently, when an alloc fails and is rescheduled, the alloc desired state remains as "run" and the nomad client may not free the resources. Here, we ensure that an alloc is marked as stopped when it's rescheduled. Notice the Desired Status and Description before and after this change: Before: ``` mars-2:nomad notnoop$ nomad alloc status 02aba49e ID = 02aba49e Eval ID = bb9ed1d2 Name = example-reschedule.nodes[0] Node ID = 5853d547 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = run Desired Description = <none> Created = 10s ago Modified = 5s ago Replacement Alloc ID = d6bf872b Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 0/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:12:45Z Finished At = 2019-06-06T21:12:50Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:12:50-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:12:50-04:00 Terminated Exit Code: 1 2019-06-06T17:12:45-04:00 Started Task started by client 2019-06-06T17:12:45-04:00 Task Setup Building Task Directory 2019-06-06T17:12:45-04:00 Received Task received by client ``` After: ``` ID = 5001ccd1 Eval ID = 53507a02 Name = example-reschedule.nodes[0] Node ID = a3b04364 Node Name = mars-2.local Job ID = example-reschedule Job Version = 0 Client Status = failed Client Description = Failed tasks Desired Status = stop Desired Description = alloc was rescheduled because it failed Created = 13s ago Modified = 3s ago Replacement Alloc ID = 7ba7ac20 Task "payload" is "dead" Task Resources CPU Memory Disk Addresses 21/100 MHz 24 MiB/300 MiB 300 MiB Task Events: Started At = 2019-06-06T21:22:50Z Finished At = 2019-06-06T21:22:55Z Total Restarts = 0 Last Restart = N/A Recent Events: Time Type Description 2019-06-06T17:22:55-04:00 Not Restarting Policy allows no restarts 2019-06-06T17:22:55-04:00 Terminated Exit Code: 1 2019-06-06T17:22:50-04:00 Started Task started by client 2019-06-06T17:22:50-04:00 Task Setup Building Task Directory 2019-06-06T17:22:50-04:00 Received Task received by client ```
2019-06-06 19:04:32 +00:00
Stop: 1,
2018-05-09 16:30:42 +00:00
Ignore: 4,
},
},
})
// Rescheduled allocs should have previous allocs
assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
assertPlacementsAreRescheduled(t, 1, r.place)
}
// Tests behavior of service failure with rescheduling policy preventing rescheduling:
// new allocs should be placed to satisfy the job count, and current allocations are
// left unmodified
func TestReconciler_RescheduleNot_Service(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 5
job := mock.Job()
job.TaskGroups[0].Count = 5
tgName := job.TaskGroups[0].Name
now := time.Now()
// Set up reschedule policy and update block
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "",
MaxDelay: 1 * time.Hour,
Unlimited: false,
}
job.TaskGroups[0].Update = noCanaryUpdate
// Create 5 existing allocations
var allocs []*structs.Allocation
for i := 0; i < 5; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark two as failed
allocs[0].ClientStatus = structs.AllocClientStatusFailed
// Mark one of them as already rescheduled once
allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: uuid.Generate(),
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-10 * time.Second)}}
allocs[1].ClientStatus = structs.AllocClientStatusFailed
// Mark one as desired state stop
allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nil, "", 50, true)
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// no rescheduling, ignore all 4 allocs
// but place one to substitute allocs[4] that was stopped explicitly
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 1,
inplace: 0,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Ignore: 4,
Stop: 0,
},
},
})
// none of the placement should have preallocs or rescheduled
assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
assertPlacementsAreRescheduled(t, 0, r.place)
}
// Tests behavior of batch failure with rescheduling policy preventing rescheduling:
// current allocations are left unmodified and no follow up
func TestReconciler_RescheduleNot_Batch(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// Set desired 4
job := mock.Job()
job.TaskGroups[0].Count = 4
now := time.Now()
// Set up reschedule policy
job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
Attempts: 0,
Interval: 24 * time.Hour,
Delay: 5 * time.Second,
DelayFunction: "constant",
}
tgName := job.TaskGroups[0].Name
// Create 6 existing allocations - 2 running, 1 complete and 3 failed
var allocs []*structs.Allocation
for i := 0; i < 6; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = uuid.Generate()
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
allocs = append(allocs, alloc)
alloc.ClientStatus = structs.AllocClientStatusRunning
}
// Mark 3 as failed with restart tracking info
allocs[0].ClientStatus = structs.AllocClientStatusFailed
allocs[0].NextAllocation = allocs[1].ID
allocs[1].ClientStatus = structs.AllocClientStatusFailed
allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
}}
allocs[1].NextAllocation = allocs[2].ID
allocs[2].ClientStatus = structs.AllocClientStatusFailed
allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
StartedAt: now.Add(-1 * time.Hour),
FinishedAt: now.Add(-5 * time.Second)}}
allocs[2].FollowupEvalID = uuid.Generate()
allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[0].ID,
PrevNodeID: uuid.Generate(),
},
{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
PrevAllocID: allocs[1].ID,
PrevNodeID: uuid.Generate(),
},
}}
// Mark one as complete
allocs[5].ClientStatus = structs.AllocClientStatusComplete
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job,
nil, allocs, nil, "", 50, true)
reconciler.now = now
r := reconciler.Compute()
// Verify that no follow up evals were created
evals := r.desiredFollowupEvals[tgName]
require.Nil(evals)
// No reschedule attempts were made and all allocs are untouched
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
stop: 0,
inplace: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 0,
Stop: 0,
Ignore: 4,
},
},
})
}
// Tests that when a node disconnects running allocations are queued to transition to unknown.
func TestReconciler_Node_Disconnect_Updates_Alloc_To_Unknown(t *testing.T) {
job, allocs := buildResumableAllocations(3, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
// Build a map of disconnected nodes
nodes := buildDisconnectedNodes(allocs, 2)
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, allocs, nodes, "", 50, true)
reconciler.now = time.Now().UTC()
results := reconciler.Compute()
// Verify that 1 follow up eval was created with the values we expect.
evals := results.desiredFollowupEvals[job.TaskGroups[0].Name]
require.Len(t, evals, 1)
expectedTime := reconciler.now.Add(5 * time.Minute)
eval := evals[0]
require.NotNil(t, eval.WaitUntil)
require.Equal(t, expectedTime, eval.WaitUntil)
// Validate that the queued disconnectUpdates have the right client status,
// and that they have a valid FollowUpdEvalID.
for _, disconnectUpdate := range results.disconnectUpdates {
require.Equal(t, structs.AllocClientStatusUnknown, disconnectUpdate.ClientStatus)
require.NotEmpty(t, disconnectUpdate.FollowupEvalID)
require.Equal(t, eval.ID, disconnectUpdate.FollowupEvalID)
}
// 2 to place, 2 to update, 1 to ignore
assertResults(t, results, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
stop: 0,
inplace: 0,
disconnectUpdates: 2,
// 2 to place and 1 to ignore
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 2,
Stop: 0,
Ignore: 1,
InPlaceUpdate: 0,
},
},
})
}
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
func TestReconciler_Disconnect_UpdateJobAfterReconnect(t *testing.T) {
ci.Parallel(t)
// Create 2 allocs and simulate one have being previously disconnected and
// then reconnected.
job, allocs := buildResumableAllocations(2, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
allocs[0].AllocStates = []*structs.AllocState{
{
Field: structs.AllocStateFieldClientStatus,
Value: structs.AllocClientStatusUnknown,
Time: time.Now().Add(-5 * time.Minute),
},
{
Field: structs.AllocStateFieldClientStatus,
Value: structs.AllocClientStatusRunning,
Time: time.Now(),
},
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job,
nil, allocs, nil, "", 50, true)
results := reconciler.Compute()
// Assert both allocations will be updated.
assertResults(t, results, &resultExpectation{
inplace: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
InPlaceUpdate: 2,
},
},
})
}
// Tests that when a node disconnects/reconnects allocations for that node are
// reconciled according to the business rules.
func TestReconciler_Disconnected_Client(t *testing.T) {
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectAllocState := []*structs.AllocState{{
Field: structs.AllocStateFieldClientStatus,
Value: structs.AllocClientStatusUnknown,
Time: time.Now(),
}}
type testCase struct {
name string
allocCount int
disconnectedAllocCount int
jobVersionIncrement uint64
nodeScoreIncrement float64
disconnectedAllocStatus string
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates []*structs.AllocState
isBatch bool
nodeStatusDisconnected bool
replace bool
failReplacement bool
taintReplacement bool
disconnectReplacement bool
replaceFailedReplacement bool
shouldStopOnDisconnectedNode bool
maxDisconnect *time.Duration
expected *resultExpectation
}
testCases := []testCase{
{
name: "reconnect-original-no-replacement",
allocCount: 2,
replace: false,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: false,
expected: &resultExpectation{
reconnectUpdates: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
},
},
},
},
{
name: "resume-original-and-stop-replacement",
allocCount: 3,
replace: true,
disconnectedAllocCount: 1,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: false,
expected: &resultExpectation{
stop: 1,
reconnectUpdates: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 1,
Ignore: 3,
},
},
},
},
{
name: "stop-original-with-lower-node-score",
allocCount: 4,
replace: true,
disconnectedAllocCount: 1,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
nodeScoreIncrement: 1,
expected: &resultExpectation{
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 1,
Ignore: 4,
},
},
},
},
{
name: "stop-original-failed-on-reconnect",
allocCount: 4,
replace: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusFailed,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
expected: &resultExpectation{
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Ignore: 4,
},
},
},
},
{
name: "reschedule-original-failed-if-not-replaced",
allocCount: 4,
replace: false,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusFailed,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
expected: &resultExpectation{
stop: 2,
place: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
Place: 2,
Stop: 2,
},
},
},
},
{
name: "ignore-reconnect-completed",
allocCount: 2,
replace: false,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusComplete,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
isBatch: true,
expected: &resultExpectation{
place: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
Place: 2,
},
},
},
},
{
name: "keep-original-alloc-and-stop-failed-replacement",
allocCount: 3,
replace: true,
failReplacement: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
disconnectedAllocStates: disconnectAllocState,
expected: &resultExpectation{
reconnectUpdates: 2,
stop: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 5,
},
},
},
},
{
name: "keep-original-and-stop-reconnecting-replacement",
allocCount: 2,
replace: true,
disconnectReplacement: true,
disconnectedAllocCount: 1,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
disconnectedAllocStates: disconnectAllocState,
expected: &resultExpectation{
reconnectUpdates: 1,
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
Stop: 1,
},
},
},
},
{
name: "keep-original-and-stop-tainted-replacement",
allocCount: 3,
replace: true,
taintReplacement: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
disconnectedAllocStates: disconnectAllocState,
expected: &resultExpectation{
reconnectUpdates: 2,
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 3,
Stop: 2,
},
},
},
},
{
name: "stop-original-alloc-with-old-job-version",
allocCount: 5,
replace: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
jobVersionIncrement: 1,
expected: &resultExpectation{
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 5,
Stop: 2,
},
},
},
},
{
name: "stop-original-alloc-with-old-job-version-reconnect-eval",
allocCount: 5,
replace: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
jobVersionIncrement: 1,
expected: &resultExpectation{
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Ignore: 5,
},
},
},
},
{
name: "stop-original-alloc-with-old-job-version-and-failed-replacements-replaced",
allocCount: 5,
replace: true,
failReplacement: true,
replaceFailedReplacement: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: false,
jobVersionIncrement: 1,
expected: &resultExpectation{
stop: 2,
reconnectUpdates: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Ignore: 7,
},
},
},
},
{
name: "stop-original-pending-alloc-for-disconnected-node",
allocCount: 2,
replace: true,
disconnectedAllocCount: 1,
disconnectedAllocStatus: structs.AllocClientStatusPending,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
nodeStatusDisconnected: true,
expected: &resultExpectation{
stop: 1,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 1,
Ignore: 2,
},
},
},
},
{
name: "stop-failed-original-and-failed-replacements-and-place-new",
allocCount: 5,
replace: true,
failReplacement: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusFailed,
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
expected: &resultExpectation{
stop: 2,
place: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Place: 2,
Ignore: 5,
},
},
},
},
{
name: "stop-expired-allocs",
allocCount: 5,
replace: true,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusUnknown,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
nodeStatusDisconnected: true,
maxDisconnect: pointer.Of(2 * time.Second),
expected: &resultExpectation{
stop: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Ignore: 5,
},
},
},
},
{
name: "replace-allocs-on-disconnected-node",
allocCount: 5,
replace: false,
disconnectedAllocCount: 2,
disconnectedAllocStatus: structs.AllocClientStatusRunning,
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
disconnectedAllocStates: []*structs.AllocState{},
nodeStatusDisconnected: true,
expected: &resultExpectation{
place: 2,
disconnectUpdates: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Place: 2,
Ignore: 3,
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
require.NotEqual(t, 0, tc.allocCount, "invalid test case: alloc count must be greater than zero")
testNode := mock.Node()
if tc.nodeStatusDisconnected == true {
testNode.Status = structs.NodeStatusDisconnected
}
// Create resumable allocs
job, allocs := buildResumableAllocations(tc.allocCount, structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun, 2)
origAllocs := set.New[string](len(allocs))
for _, alloc := range allocs {
origAllocs.Insert(alloc.ID)
}
if tc.isBatch {
job.Type = structs.JobTypeBatch
}
// Set alloc state
disconnectedAllocCount := tc.disconnectedAllocCount
for _, alloc := range allocs {
alloc.DesiredStatus = structs.AllocDesiredStatusRun
if tc.maxDisconnect != nil {
alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect
}
if disconnectedAllocCount > 0 {
alloc.ClientStatus = tc.disconnectedAllocStatus
Update alloc after reconnect and enforece client heartbeat order (#15068) * scheduler: allow updates after alloc reconnects When an allocation reconnects to a cluster the scheduler needs to run special logic to handle the reconnection, check if a replacement was create and stop one of them. If the allocation kept running while the node was disconnected, it will be reconnected with `ClientStatus: running` and the node will have `Status: ready`. This combination is the same as the normal steady state of allocation, where everything is running as expected. In order to differentiate between the two states (an allocation that is reconnecting and one that is just running) the scheduler needs an extra piece of state. The current implementation uses the presence of a `TaskClientReconnected` task event to detect when the allocation has reconnected and thus must go through the reconnection process. But this event remains even after the allocation is reconnected, causing all future evals to consider the allocation as still reconnecting. This commit changes the reconnect logic to use an `AllocState` to register when the allocation was reconnected. This provides the following benefits: - Only a limited number of task states are kept, and they are used for many other events. It's possible that, upon reconnecting, several actions are triggered that could cause the `TaskClientReconnected` event to be dropped. - Task events are set by clients and so their timestamps are subject to time skew from servers. This prevents using time to determine if an allocation reconnected after a disconnect event. - Disconnect events are already stored as `AllocState` and so storing reconnects there as well makes it the only source of information required. With the new logic, the reconnection logic is only triggered if the last `AllocState` is a disconnect event, meaning that the allocation has not been reconnected yet. After the reconnection is handled, the new `ClientStatus` is store in `AllocState` allowing future evals to skip the reconnection logic. * scheduler: prevent spurious placement on reconnect When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events. * rpc: only allow alloc updates from `ready` nodes Clients interact with servers using three main RPC methods: - `Node.GetAllocs` reads allocation data from the server and writes it to the client. - `Node.UpdateAlloc` reads allocation from from the client and writes them to the server. - `Node.UpdateStatus` writes the client status to the server and is used as the heartbeat mechanism. These three methods are called periodically by the clients and are done so independently from each other, meaning that there can't be any assumptions in their ordering. This can generate scenarios that are hard to reason about and to code for. For example, when a client misses too many heartbeats it will be considered `down` or `disconnected` and the allocations it was running are set to `lost` or `unknown`. When connectivity is restored the to rest of the cluster, the natural mental model is to think that the client will heartbeat first and then update its allocations status into the servers. But since there's no inherit order in these calls the reverse is just as possible: the client updates the alloc status and then heartbeats. This results in a state where allocs are, for example, `running` while the client is still `disconnected`. This commit adds a new verification to the `Node.UpdateAlloc` method to reject updates from nodes that are not `ready`, forcing clients to heartbeat first. Since this check is done server-side there is no need to coordinate operations client-side: they can continue sending these requests independently and alloc update will succeed after the heartbeat is done. * chagelog: add entry for #15068 * code review * client: skip terminal allocations on reconnect When the client reconnects with the server it synchronizes the state of its allocations by sending data using the `Node.UpdateAlloc` RPC and fetching data using the `Node.GetClientAllocs` RPC. If the data fetch happens before the data write, `unknown` allocations will still be in this state and would trigger the `allocRunner.Reconnect` flow. But when the server `DesiredStatus` for the allocation is `stop` the client should not reconnect the allocation. * apply more code review changes * scheduler: persist changes to reconnected allocs Reconnected allocs have a new AllocState entry that must be persisted by the plan applier. * rpc: read node ID from allocs in UpdateAlloc The AllocUpdateRequest struct is used in three disjoint use cases: 1. Stripped allocs from clients Node.UpdateAlloc RPC using the Allocs, and WriteRequest fields 2. Raft log message using the Allocs, Evals, and WriteRequest fields 3. Plan updates using the AllocsStopped, AllocsUpdated, and Job fields Adding a new field that would only be used in one these cases (1) made things more confusing and error prone. While in theory an AllocUpdateRequest could send allocations from different nodes, in practice this never actually happens since only clients call this method with their own allocations. * scheduler: remove logic to handle exceptional case This condition could only be hit if, somehow, the allocation status was set to "running" while the client was "unknown". This was addressed by enforcing an order in "Node.UpdateStatus" and "Node.UpdateAlloc" RPC calls, so this scenario is not expected to happen. Adding unnecessary code to the scheduler makes it harder to read and reason about it. * more code review * remove another unused test
2022-11-04 20:25:11 +00:00
alloc.AllocStates = tc.disconnectedAllocStates
// Set the node id on all the disconnected allocs to the node under test.
alloc.NodeID = testNode.ID
alloc.NodeName = "disconnected"
disconnectedAllocCount--
}
}
// Place the allocs on another node.
if tc.replace {
replacements := make([]*structs.Allocation, 0)
for _, alloc := range allocs {
if alloc.NodeID != testNode.ID {
continue
}
replacement := alloc.Copy()
replacement.ID = uuid.Generate()
replacement.NodeID = uuid.Generate()
replacement.ClientStatus = structs.AllocClientStatusRunning
replacement.PreviousAllocation = alloc.ID
replacement.AllocStates = nil
replacement.TaskStates = nil
replacement.CreateIndex += 1
alloc.NextAllocation = replacement.ID
if tc.jobVersionIncrement != 0 {
replacement.Job.Version = replacement.Job.Version + tc.jobVersionIncrement
}
if tc.nodeScoreIncrement != 0 {
replacement.Metrics.ScoreMetaData[0].NormScore = replacement.Metrics.ScoreMetaData[0].NormScore + tc.nodeScoreIncrement
}
if tc.taintReplacement {
replacement.DesiredTransition.Migrate = pointer.Of(true)
}
if tc.disconnectReplacement {
replacement.AllocStates = tc.disconnectedAllocStates
}
// If we want to test intermediate replacement failures simulate that.
if tc.failReplacement {
replacement.ClientStatus = structs.AllocClientStatusFailed
if tc.replaceFailedReplacement {
nextReplacement := replacement.Copy()
nextReplacement.ID = uuid.Generate()
nextReplacement.ClientStatus = structs.AllocClientStatusRunning
nextReplacement.DesiredStatus = structs.AllocDesiredStatusRun
nextReplacement.PreviousAllocation = replacement.ID
nextReplacement.CreateIndex += 1
replacement.NextAllocation = nextReplacement.ID
replacement.DesiredStatus = structs.AllocDesiredStatusStop
replacements = append(replacements, nextReplacement)
}
}
replacements = append(replacements, replacement)
}
allocs = append(allocs, replacements...)
}
reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, tc.isBatch, job.ID, job,
nil, allocs, map[string]*structs.Node{testNode.ID: testNode}, "", 50, true)
reconciler.now = time.Now()
if tc.maxDisconnect != nil {
reconciler.now = time.Now().Add(*tc.maxDisconnect * 20)
}
results := reconciler.Compute()
assertResults(t, results, tc.expected)
for _, stopResult := range results.stop {
// Skip replacement allocs.
if !origAllocs.Contains(stopResult.alloc.ID) {
continue
}
if tc.shouldStopOnDisconnectedNode {
require.Equal(t, testNode.ID, stopResult.alloc.NodeID)
} else {
require.NotEqual(t, testNode.ID, stopResult.alloc.NodeID)
}
require.Equal(t, job.Version, stopResult.alloc.Job.Version)
}
})
}
}
// Tests that a client disconnect while a canary is in progress generates the result.
func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
disconnectAllocState := []*structs.AllocState{{
Field: structs.AllocStateFieldClientStatus,
Value: structs.AllocClientStatusUnknown,
Time: time.Now(),
}}
type testCase struct {
name string
nodes []string
deploymentState *structs.DeploymentState
deployedAllocs map[*structs.Node][]*structs.Allocation
canaryAllocs map[*structs.Node][]*structs.Allocation
expectedResult *resultExpectation
}
running := structs.AllocClientStatusRunning
complete := structs.AllocClientStatusComplete
unknown := structs.AllocClientStatusUnknown
pending := structs.AllocClientStatusPending
run := structs.AllocDesiredStatusRun
stop := structs.AllocDesiredStatusStop
maxClientDisconnect := 10 * time.Minute
readyNode := mock.Node()
readyNode.Name = "ready-" + readyNode.ID
readyNode.Status = structs.NodeStatusReady
disconnectedNode := mock.Node()
disconnectedNode.Name = "disconnected-" + disconnectedNode.ID
disconnectedNode.Status = structs.NodeStatusDisconnected
// Job with allocations and max_client_disconnect
job := mock.Job()
updatedJob := job.Copy()
updatedJob.Version = updatedJob.Version + 1
testCases := []testCase{
{
name: "3-placed-1-disconnect",
deploymentState: &structs.DeploymentState{
AutoRevert: false,
AutoPromote: false,
Promoted: false,
ProgressDeadline: 5 * time.Minute,
RequireProgressBy: time.Now().Add(5 * time.Minute),
PlacedCanaries: []string{},
DesiredCanaries: 1,
DesiredTotal: 6,
PlacedAllocs: 3,
HealthyAllocs: 2,
UnhealthyAllocs: 0,
},
deployedAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// filtered as terminal
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
// Ignored
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
// destructive, but discarded because canarying
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
},
disconnectedNode: {
// filtered as terminal
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
// Gets a placement, and a disconnect update
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
// Gets a placement, and a disconnect update
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
},
},
canaryAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// Ignored
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
// Ignored
{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
},
disconnectedNode: {
// Gets a placement, and a disconnect update
{Name: "my-job.web[1]", ClientStatus: running, DesiredStatus: run},
},
},
expectedResult: &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 3,
destructive: 0,
stop: 0,
inplace: 0,
attributeUpdates: 0,
disconnectUpdates: 3,
reconnectUpdates: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
updatedJob.TaskGroups[0].Name: {
Place: 3,
Canary: 0,
Ignore: 6,
},
},
},
},
{
name: "ignore-unknown",
deploymentState: &structs.DeploymentState{
AutoRevert: false,
AutoPromote: false,
Promoted: false,
ProgressDeadline: 5 * time.Minute,
RequireProgressBy: time.Now().Add(5 * time.Minute),
PlacedCanaries: []string{},
DesiredCanaries: 1,
DesiredTotal: 6,
PlacedAllocs: 3,
HealthyAllocs: 2,
UnhealthyAllocs: 0,
},
deployedAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// filtered as terminal
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
// Ignored
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
// destructive, but discarded because canarying
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
},
disconnectedNode: {
// filtered as terminal
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
// Gets a placement, and a disconnect update
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
// Gets a placement, and a disconnect update
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
},
},
canaryAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// Ignored
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
// Ignored
{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
},
disconnectedNode: {
// Ignored
{Name: "my-job.web[1]", ClientStatus: unknown, DesiredStatus: run},
},
},
expectedResult: &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
destructive: 0,
stop: 0,
inplace: 0,
attributeUpdates: 0,
disconnectUpdates: 2,
reconnectUpdates: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
updatedJob.TaskGroups[0].Name: {
Place: 2,
Canary: 0,
Ignore: 7,
},
},
},
},
{
name: "4-placed-2-pending-lost",
deploymentState: &structs.DeploymentState{
AutoRevert: false,
AutoPromote: false,
Promoted: false,
ProgressDeadline: 5 * time.Minute,
RequireProgressBy: time.Now().Add(5 * time.Minute),
PlacedCanaries: []string{},
DesiredCanaries: 2,
DesiredTotal: 6,
PlacedAllocs: 4,
HealthyAllocs: 2,
UnhealthyAllocs: 0,
},
deployedAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// filtered as terminal
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
// filtered as terminal
{Name: "my-job.web[2]", ClientStatus: complete, DesiredStatus: stop},
// destructive, but discarded because canarying
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
},
disconnectedNode: {
// filtered as terminal
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
// Gets a placement, and a disconnect update
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
// Gets a placement, and a disconnect update
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
},
},
canaryAllocs: map[*structs.Node][]*structs.Allocation{
readyNode: {
// Ignored
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
// Ignored
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: run},
},
disconnectedNode: {
// Stop/Lost because pending
{Name: "my-job.web[1]", ClientStatus: pending, DesiredStatus: run},
// Stop/Lost because pending
{Name: "my-job.web[3]", ClientStatus: pending, DesiredStatus: run},
},
},
expectedResult: &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 2,
destructive: 0,
stop: 2,
inplace: 0,
attributeUpdates: 0,
disconnectUpdates: 2,
reconnectUpdates: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
updatedJob.TaskGroups[0].Name: {
Place: 2,
Canary: 0,
Ignore: 6,
// The 2 stops in this test are transient failures, but
// the deployment can still progress. We don't include
// them in the stop count since DesiredTGUpdates is used
// to report deployment progress or final deployment state.
Stop: 0,
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Set the count dynamically to the number from the original deployment.
job.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
job.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
job.TaskGroups[0].Update = &structs.UpdateStrategy{
MaxParallel: 1,
Canary: tc.deploymentState.DesiredCanaries,
MinHealthyTime: 3 * time.Second,
HealthyDeadline: 20 * time.Second,
AutoRevert: true,
AutoPromote: true,
}
updatedJob.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
updatedJob.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
updatedJob.TaskGroups[0].Update = &structs.UpdateStrategy{
MaxParallel: 1,
Canary: tc.deploymentState.DesiredCanaries,
MinHealthyTime: 3 * time.Second,
HealthyDeadline: 20 * time.Second,
AutoRevert: true,
AutoPromote: true,
}
// Populate Alloc IDS, Node IDs, Job on deployed allocs
allocsConfigured := 0
for node, allocs := range tc.deployedAllocs {
for _, alloc := range allocs {
alloc.ID = uuid.Generate()
alloc.NodeID = node.ID
alloc.NodeName = node.Name
alloc.JobID = job.ID
alloc.Job = job
alloc.TaskGroup = job.TaskGroups[0].Name
allocsConfigured++
}
}
require.Equal(t, tc.deploymentState.DesiredTotal, allocsConfigured, "invalid alloc configuration: expect %d got %d", tc.deploymentState.DesiredTotal, allocsConfigured)
// Populate Alloc IDS, Node IDs, Job on canaries
canariesConfigured := 0
handled := make(map[string]allocUpdateType)
for node, allocs := range tc.canaryAllocs {
for _, alloc := range allocs {
alloc.ID = uuid.Generate()
alloc.NodeID = node.ID
alloc.NodeName = node.Name
alloc.JobID = updatedJob.ID
alloc.Job = updatedJob
alloc.TaskGroup = updatedJob.TaskGroups[0].Name
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
}
if alloc.ClientStatus == structs.AllocClientStatusRunning {
alloc.DeploymentStatus.Healthy = pointer.Of(true)
}
if alloc.ClientStatus == structs.AllocClientStatusUnknown {
alloc.AllocStates = disconnectAllocState
alloc.FollowupEvalID = "eval-where-it-was-set-to-unknow"
}
tc.deploymentState.PlacedCanaries = append(tc.deploymentState.PlacedCanaries, alloc.ID)
handled[alloc.ID] = allocUpdateFnIgnore
canariesConfigured++
}
}
// Validate tc.canaryAllocs against tc.deploymentState
require.Equal(t, tc.deploymentState.PlacedAllocs, canariesConfigured, "invalid canary configuration: expect %d got %d", tc.deploymentState.PlacedAllocs, canariesConfigured)
deployment := structs.NewDeployment(updatedJob, 50)
deployment.TaskGroups[updatedJob.TaskGroups[0].Name] = tc.deploymentState
// Build a map of tainted nodes that contains the last canary
tainted := make(map[string]*structs.Node, 1)
tainted[disconnectedNode.ID] = disconnectedNode
allocs := make([]*structs.Allocation, 0)
allocs = append(allocs, tc.deployedAllocs[readyNode]...)
allocs = append(allocs, tc.deployedAllocs[disconnectedNode]...)
allocs = append(allocs, tc.canaryAllocs[readyNode]...)
allocs = append(allocs, tc.canaryAllocs[disconnectedNode]...)
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob,
deployment, allocs, tainted, "", 50, true)
result := reconciler.Compute()
// Assert the correct results
assertResults(t, result, tc.expectedResult)
// Validate that placements are either for placed canaries for the
// updated job, or for disconnected allocs for the original job
// and that they have a disconnect update.
for _, placeResult := range result.place {
found := false
require.NotNil(t, placeResult.previousAlloc)
for _, deployed := range tc.deployedAllocs[disconnectedNode] {
if deployed.ID == placeResult.previousAlloc.ID {
found = true
require.Equal(t, job.Version, placeResult.previousAlloc.Job.Version)
require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
require.True(t, exists)
break
}
}
for _, canary := range tc.canaryAllocs[disconnectedNode] {
if canary.ID == placeResult.previousAlloc.ID {
found = true
require.Equal(t, updatedJob.Version, placeResult.previousAlloc.Job.Version)
require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
require.True(t, exists)
break
}
}
require.True(t, found)
}
// Validate that stops are for pending disconnects
for _, stopResult := range result.stop {
require.Equal(t, pending, stopResult.alloc.ClientStatus)
}
})
}
}
// Tests the reconciler properly handles the logic for computeDeploymentPaused
// for various job types.
func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
ci.Parallel(t)
type testCase struct {
name string
jobType string
isMultiregion bool
isPeriodic bool
isParameterized bool
expected bool
}
multiregionCfg := mock.MultiregionJob().Multiregion
periodicCfg := mock.PeriodicJob().Periodic
parameterizedCfg := &structs.ParameterizedJobConfig{
Payload: structs.DispatchPayloadRequired,
}
testCases := []testCase{
{
name: "single region service is not paused",
jobType: structs.JobTypeService,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
name: "multiregion service is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
expected: true,
},
{
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
name: "single region batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
name: "multiregion batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
isParameterized: false,
expected: false,
},
{
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
name: "multiregion parameterized batch is not paused",
jobType: structs.JobTypeBatch,
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
isMultiregion: true,
isPeriodic: false,
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
isParameterized: true,
expected: false,
},
{
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
name: "multiregion periodic batch is not paused",
jobType: structs.JobTypeBatch,
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
isMultiregion: true,
isPeriodic: true,
isParameterized: false,
expected: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var job *structs.Job
if tc.jobType == structs.JobTypeService {
job = mock.Job()
} else if tc.jobType == structs.JobTypeBatch {
job = mock.BatchJob()
}
require.NotNil(t, job, "invalid job type", tc.jobType)
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
var deployment *structs.Deployment
if tc.isMultiregion {
job.Multiregion = multiregionCfg
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
// This deployment is created by the Job.Register RPC and
// fetched by the scheduler before handing it to the
// reconciler.
if job.UsesDeployments() {
deployment = structs.NewDeployment(job, 100)
deployment.Status = structs.DeploymentStatusInitializing
deployment.StatusDescription = structs.DeploymentStatusDescriptionPendingForPeer
}
}
if tc.isPeriodic {
job.Periodic = periodicCfg
}
if tc.isParameterized {
job.ParameterizedJob = parameterizedCfg
}
reconciler := NewAllocReconciler(
scheduler: create placements for non-register MRD (#15325) * scheduler: create placements for non-register MRD For multiregion jobs, the scheduler does not create placements on registration because the deployment must wait for the other regions. Once of these regions will then trigger the deployment to run. Currently, this is done in the scheduler by considering any eval for a multiregion job as "paused" since it's expected that another region will eventually unpause it. This becomes a problem where evals not triggered by a job registration happen, such as on a node update. These types of regional changes do not have other regions waiting to progress the deployment, and so they were never resulting in placements. The fix is to create a deployment at job registration time. This additional piece of state allows the scheduler to differentiate between a multiregion change, where there are other regions engaged in the deployment so no placements are required, from a regional change, where the scheduler does need to create placements. This deployment starts in the new "initializing" status to signal to the scheduler that it needs to compute the initial deployment state. The multiregion deployment will wait until this deployment state is persisted and its starts is set to "pending". Without this state transition it's possible to hit a race condition where the plan applier and the deployment watcher may step of each other and overwrite their changes. * changelog: add entry for #15325
2022-11-25 17:45:34 +00:00
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, deployment,
nil, nil, "", job.Priority, true)
_ = reconciler.Compute()
require.Equal(t, tc.expected, reconciler.deploymentPaused)
})
}
}