b63944b5c1
Bumping compile time requirement to go 1.18 allows us to simplify our pointer helper methods.
935 lines
31 KiB
Go
935 lines
31 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// Test that we properly create the bitmap even when the alloc set includes an
|
|
// allocation with a higher count than the current min count and it is byte
|
|
// aligned.
|
|
// Ensure no regression from: https://github.com/hashicorp/nomad/issues/3008
|
|
func TestBitmapFrom(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
input := map[string]*structs.Allocation{
|
|
"8": {
|
|
JobID: "foo",
|
|
TaskGroup: "bar",
|
|
Name: "foo.bar[8]",
|
|
},
|
|
}
|
|
b := bitmapFrom(input, 1)
|
|
exp := uint(16)
|
|
if act := b.Size(); act != exp {
|
|
t.Fatalf("got %d; want %d", act, exp)
|
|
}
|
|
|
|
b = bitmapFrom(input, 8)
|
|
if act := b.Size(); act != exp {
|
|
t.Fatalf("got %d; want %d", act, exp)
|
|
}
|
|
}
|
|
|
|
func TestAllocSet_filterByTainted(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
nodes := map[string]*structs.Node{
|
|
"draining": {
|
|
ID: "draining",
|
|
DrainStrategy: mock.DrainNode().DrainStrategy,
|
|
},
|
|
"lost": {
|
|
ID: "lost",
|
|
Status: structs.NodeStatusDown,
|
|
},
|
|
"nil": nil,
|
|
"normal": {
|
|
ID: "normal",
|
|
Status: structs.NodeStatusReady,
|
|
},
|
|
"disconnected": {
|
|
ID: "disconnected",
|
|
Status: structs.NodeStatusDisconnected,
|
|
},
|
|
}
|
|
|
|
testJob := mock.Job()
|
|
testJob.TaskGroups[0].MaxClientDisconnect = pointer.Of(5 * time.Second)
|
|
now := time.Now()
|
|
|
|
testJobNoMaxDisconnect := mock.Job()
|
|
testJobNoMaxDisconnect.TaskGroups[0].MaxClientDisconnect = nil
|
|
|
|
unknownAllocState := []*structs.AllocState{{
|
|
Field: structs.AllocStateFieldClientStatus,
|
|
Value: structs.AllocClientStatusUnknown,
|
|
Time: now,
|
|
}}
|
|
|
|
expiredAllocState := []*structs.AllocState{{
|
|
Field: structs.AllocStateFieldClientStatus,
|
|
Value: structs.AllocClientStatusUnknown,
|
|
Time: now.Add(-60 * time.Second),
|
|
}}
|
|
|
|
reconnectedEvent := structs.NewTaskEvent(structs.TaskClientReconnected)
|
|
reconnectedEvent.Time = time.Now().UnixNano()
|
|
reconnectTaskState := map[string]*structs.TaskState{
|
|
testJob.TaskGroups[0].Tasks[0].Name: {
|
|
Events: []*structs.TaskEvent{reconnectedEvent},
|
|
},
|
|
}
|
|
|
|
type testCase struct {
|
|
name string
|
|
all allocSet
|
|
taintedNodes map[string]*structs.Node
|
|
supportsDisconnectedClients bool
|
|
skipNilNodeTest bool
|
|
now time.Time
|
|
// expected results
|
|
untainted allocSet
|
|
migrate allocSet
|
|
lost allocSet
|
|
disconnecting allocSet
|
|
reconnecting allocSet
|
|
ignore allocSet
|
|
}
|
|
|
|
testCases := []testCase{
|
|
// These two cases test that we maintain parity with pre-disconnected-clients behavior.
|
|
{
|
|
name: "lost-client",
|
|
supportsDisconnectedClients: false,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
"untainted1": {
|
|
ID: "untainted1",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
},
|
|
// Terminal allocs are always untainted
|
|
"untainted2": {
|
|
ID: "untainted2",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
},
|
|
// Terminal allocs are always untainted, even on draining nodes
|
|
"untainted3": {
|
|
ID: "untainted3",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "draining",
|
|
},
|
|
// Terminal allocs are always untainted, even on lost nodes
|
|
"untainted4": {
|
|
ID: "untainted4",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
// Non-terminal alloc with migrate=true should migrate on a draining node
|
|
"migrating1": {
|
|
ID: "migrating1",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
|
|
Job: testJob,
|
|
NodeID: "draining",
|
|
},
|
|
// Non-terminal alloc with migrate=true should migrate on an unknown node
|
|
"migrating2": {
|
|
ID: "migrating2",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
|
|
Job: testJob,
|
|
NodeID: "nil",
|
|
},
|
|
},
|
|
untainted: allocSet{
|
|
"untainted1": {
|
|
ID: "untainted1",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
},
|
|
// Terminal allocs are always untainted
|
|
"untainted2": {
|
|
ID: "untainted2",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
},
|
|
// Terminal allocs are always untainted, even on draining nodes
|
|
"untainted3": {
|
|
ID: "untainted3",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "draining",
|
|
},
|
|
// Terminal allocs are always untainted, even on lost nodes
|
|
"untainted4": {
|
|
ID: "untainted4",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
},
|
|
migrate: allocSet{
|
|
// Non-terminal alloc with migrate=true should migrate on a draining node
|
|
"migrating1": {
|
|
ID: "migrating1",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
|
|
Job: testJob,
|
|
NodeID: "draining",
|
|
},
|
|
// Non-terminal alloc with migrate=true should migrate on an unknown node
|
|
"migrating2": {
|
|
ID: "migrating2",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredTransition: structs.DesiredTransition{Migrate: pointer.Of(true)},
|
|
Job: testJob,
|
|
NodeID: "nil",
|
|
},
|
|
},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{},
|
|
ignore: allocSet{},
|
|
lost: allocSet{},
|
|
},
|
|
{
|
|
name: "lost-client-only-tainted-nodes",
|
|
supportsDisconnectedClients: false,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
// The logic associated with this test case can only trigger if there
|
|
// is a tainted node. Therefore, testing with a nil node set produces
|
|
// false failures, so don't perform that test if in this case.
|
|
skipNilNodeTest: true,
|
|
all: allocSet{
|
|
// Non-terminal allocs on lost nodes are lost
|
|
"lost1": {
|
|
ID: "lost1",
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
// Non-terminal allocs on lost nodes are lost
|
|
"lost2": {
|
|
ID: "lost2",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
},
|
|
untainted: allocSet{},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{},
|
|
ignore: allocSet{},
|
|
lost: allocSet{
|
|
// Non-terminal allocs on lost nodes are lost
|
|
"lost1": {
|
|
ID: "lost1",
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
// Non-terminal allocs on lost nodes are lost
|
|
"lost2": {
|
|
ID: "lost2",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
Job: testJob,
|
|
NodeID: "lost",
|
|
},
|
|
},
|
|
},
|
|
|
|
{
|
|
name: "disco-client-disconnect-unset-max-disconnect",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: true,
|
|
all: allocSet{
|
|
// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
|
|
"lost-running": {
|
|
ID: "lost-running",
|
|
Name: "lost-running",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJobNoMaxDisconnect,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
},
|
|
untainted: allocSet{},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{},
|
|
ignore: allocSet{},
|
|
lost: allocSet{
|
|
"lost-running": {
|
|
ID: "lost-running",
|
|
Name: "lost-running",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJobNoMaxDisconnect,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
},
|
|
},
|
|
|
|
// Everything below this line tests the disconnected client mode.
|
|
{
|
|
name: "disco-client-untainted-reconnect-failed-and-replaced",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
"running-replacement": {
|
|
ID: "running-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
PreviousAllocation: "failed-original",
|
|
},
|
|
// Failed and replaced allocs on reconnected nodes
|
|
// that are still desired-running are reconnected so
|
|
// we can stop them
|
|
"failed-original": {
|
|
ID: "failed-original",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
untainted: allocSet{
|
|
"running-replacement": {
|
|
ID: "running-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
PreviousAllocation: "failed-original",
|
|
},
|
|
},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{
|
|
"failed-original": {
|
|
ID: "failed-original",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
ignore: allocSet{},
|
|
lost: allocSet{},
|
|
},
|
|
{
|
|
name: "disco-client-reconnecting-running-no-replacement",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
// Running allocs on reconnected nodes with no replacement are reconnecting.
|
|
// Node.UpdateStatus has already handled syncing client state so this
|
|
// should be a noop.
|
|
"reconnecting-running-no-replacement": {
|
|
ID: "reconnecting-running-no-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
untainted: allocSet{},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{
|
|
"reconnecting-running-no-replacement": {
|
|
ID: "reconnecting-running-no-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
ignore: allocSet{},
|
|
lost: allocSet{},
|
|
},
|
|
{
|
|
name: "disco-client-terminal",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
// Allocs on reconnected nodes that are complete are untainted
|
|
"untainted-reconnect-complete": {
|
|
ID: "untainted-reconnect-complete",
|
|
Name: "untainted-reconnect-complete",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
// Failed allocs on reconnected nodes are in reconnecting so that
|
|
// they be marked with desired status stop at the server.
|
|
"reconnecting-failed": {
|
|
ID: "reconnecting-failed",
|
|
Name: "reconnecting-failed",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
// Lost allocs on reconnected nodes don't get restarted
|
|
"untainted-reconnect-lost": {
|
|
ID: "untainted-reconnect-lost",
|
|
Name: "untainted-reconnect-lost",
|
|
ClientStatus: structs.AllocClientStatusLost,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
// Replacement allocs that are complete are untainted
|
|
"untainted-reconnect-complete-replacement": {
|
|
ID: "untainted-reconnect-complete-replacement",
|
|
Name: "untainted-reconnect-complete",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "untainted-reconnect-complete",
|
|
},
|
|
// Replacement allocs on reconnected nodes that are failed are untainted
|
|
"untainted-reconnect-failed-replacement": {
|
|
ID: "untainted-reconnect-failed-replacement",
|
|
Name: "untainted-reconnect-failed",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "reconnecting-failed",
|
|
},
|
|
// Lost replacement allocs on reconnected nodes don't get restarted
|
|
"untainted-reconnect-lost-replacement": {
|
|
ID: "untainted-reconnect-lost-replacement",
|
|
Name: "untainted-reconnect-lost",
|
|
ClientStatus: structs.AllocClientStatusLost,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "untainted-reconnect-lost",
|
|
},
|
|
},
|
|
untainted: allocSet{
|
|
"untainted-reconnect-complete": {
|
|
ID: "untainted-reconnect-complete",
|
|
Name: "untainted-reconnect-complete",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
"untainted-reconnect-lost": {
|
|
ID: "untainted-reconnect-lost",
|
|
Name: "untainted-reconnect-lost",
|
|
ClientStatus: structs.AllocClientStatusLost,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
"untainted-reconnect-complete-replacement": {
|
|
ID: "untainted-reconnect-complete-replacement",
|
|
Name: "untainted-reconnect-complete",
|
|
ClientStatus: structs.AllocClientStatusComplete,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "untainted-reconnect-complete",
|
|
},
|
|
"untainted-reconnect-failed-replacement": {
|
|
ID: "untainted-reconnect-failed-replacement",
|
|
Name: "untainted-reconnect-failed",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "reconnecting-failed",
|
|
},
|
|
"untainted-reconnect-lost-replacement": {
|
|
ID: "untainted-reconnect-lost-replacement",
|
|
Name: "untainted-reconnect-lost",
|
|
ClientStatus: structs.AllocClientStatusLost,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
PreviousAllocation: "untainted-reconnect-lost",
|
|
},
|
|
},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{
|
|
"reconnecting-failed": {
|
|
ID: "reconnecting-failed",
|
|
Name: "reconnecting-failed",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
ignore: allocSet{},
|
|
lost: allocSet{},
|
|
},
|
|
{
|
|
name: "disco-client-disconnect",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: true,
|
|
all: allocSet{
|
|
// Non-terminal allocs on disconnected nodes are disconnecting
|
|
"disconnect-running": {
|
|
ID: "disconnect-running",
|
|
Name: "disconnect-running",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
// Unknown allocs on disconnected nodes are ignored
|
|
"ignore-unknown": {
|
|
ID: "ignore-unknown",
|
|
Name: "ignore-unknown",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
},
|
|
// Unknown allocs on disconnected nodes are lost when expired
|
|
"lost-unknown": {
|
|
ID: "lost-unknown",
|
|
Name: "lost-unknown",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
// Pending allocs on disconnected nodes are lost
|
|
"lost-pending": {
|
|
ID: "lost-pending",
|
|
Name: "lost-pending",
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
// Expired allocs on reconnected clients are lost
|
|
// Pending allocs on disconnected nodes are lost
|
|
"lost-expired": {
|
|
ID: "lost-expired",
|
|
Name: "lost-expired",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
// Failed and stopped allocs on disconnected nodes are ignored
|
|
"ignore-reconnected-failed-stopped": {
|
|
ID: "ignore-reconnected-failed-stopped",
|
|
Name: "ignore-reconnected-failed-stopped",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: unknownAllocState,
|
|
},
|
|
},
|
|
untainted: allocSet{},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{
|
|
"disconnect-running": {
|
|
ID: "disconnect-running",
|
|
Name: "disconnect-running",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
},
|
|
reconnecting: allocSet{},
|
|
ignore: allocSet{
|
|
// Unknown allocs on disconnected nodes are ignored
|
|
"ignore-unknown": {
|
|
ID: "ignore-unknown",
|
|
Name: "ignore-unknown",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
},
|
|
"ignore-reconnected-failed-stopped": {
|
|
ID: "ignore-reconnected-failed-stopped",
|
|
Name: "ignore-reconnected-failed-stopped",
|
|
ClientStatus: structs.AllocClientStatusFailed,
|
|
DesiredStatus: structs.AllocDesiredStatusStop,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: unknownAllocState,
|
|
},
|
|
},
|
|
lost: allocSet{
|
|
"lost-unknown": {
|
|
ID: "lost-unknown",
|
|
Name: "lost-unknown",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
"lost-pending": {
|
|
ID: "lost-pending",
|
|
Name: "lost-pending",
|
|
ClientStatus: structs.AllocClientStatusPending,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "disconnected",
|
|
TaskGroup: "web",
|
|
},
|
|
"lost-expired": {
|
|
ID: "lost-expired",
|
|
Name: "lost-expired",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "disco-client-reconnect",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
// Expired allocs on reconnected clients are lost
|
|
"lost-expired-reconnect": {
|
|
ID: "lost-expired-reconnect",
|
|
Name: "lost-expired-reconnect",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
},
|
|
untainted: allocSet{},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{},
|
|
ignore: allocSet{},
|
|
lost: allocSet{
|
|
"lost-expired-reconnect": {
|
|
ID: "lost-expired-reconnect",
|
|
Name: "lost-expired-reconnect",
|
|
ClientStatus: structs.AllocClientStatusUnknown,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
TaskStates: reconnectTaskState,
|
|
AllocStates: expiredAllocState,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "disco-client-running-reconnecting-and-replacement-untainted",
|
|
supportsDisconnectedClients: true,
|
|
now: time.Now(),
|
|
taintedNodes: nodes,
|
|
skipNilNodeTest: false,
|
|
all: allocSet{
|
|
"running-replacement": {
|
|
ID: "running-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
PreviousAllocation: "running-original",
|
|
},
|
|
// Running and replaced allocs on reconnected nodes are reconnecting
|
|
"running-original": {
|
|
ID: "running-original",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
untainted: allocSet{
|
|
"running-replacement": {
|
|
ID: "running-replacement",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
PreviousAllocation: "running-original",
|
|
},
|
|
},
|
|
migrate: allocSet{},
|
|
disconnecting: allocSet{},
|
|
reconnecting: allocSet{
|
|
"running-original": {
|
|
ID: "running-original",
|
|
Name: "web",
|
|
ClientStatus: structs.AllocClientStatusRunning,
|
|
DesiredStatus: structs.AllocDesiredStatusRun,
|
|
Job: testJob,
|
|
NodeID: "normal",
|
|
TaskGroup: "web",
|
|
AllocStates: unknownAllocState,
|
|
TaskStates: reconnectTaskState,
|
|
},
|
|
},
|
|
ignore: allocSet{},
|
|
lost: allocSet{},
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
// With tainted nodes
|
|
untainted, migrate, lost, disconnecting, reconnecting, ignore := tc.all.filterByTainted(tc.taintedNodes, tc.supportsDisconnectedClients, tc.now)
|
|
require.Equal(t, tc.untainted, untainted, "with-nodes: %s", "untainted")
|
|
require.Equal(t, tc.migrate, migrate, "with-nodes: %s", "migrate")
|
|
require.Equal(t, tc.lost, lost, "with-nodes: %s", "lost")
|
|
require.Equal(t, tc.disconnecting, disconnecting, "with-nodes: %s", "disconnecting")
|
|
require.Equal(t, tc.reconnecting, reconnecting, "with-nodes: %s", "reconnecting")
|
|
require.Equal(t, tc.ignore, ignore, "with-nodes: %s", "ignore")
|
|
|
|
if tc.skipNilNodeTest {
|
|
return
|
|
}
|
|
|
|
// Now again with nodes nil
|
|
untainted, migrate, lost, disconnecting, reconnecting, ignore = tc.all.filterByTainted(nil, tc.supportsDisconnectedClients, tc.now)
|
|
require.Equal(t, tc.untainted, untainted, "nodes-nil: %s", "untainted")
|
|
require.Equal(t, tc.migrate, migrate, "nodes-nil: %s", "migrate")
|
|
require.Equal(t, tc.lost, lost, "nodes-nil: %s", "lost")
|
|
require.Equal(t, tc.disconnecting, disconnecting, "nodes-nil: %s", "disconnecting")
|
|
require.Equal(t, tc.reconnecting, reconnecting, "nodes-nil: %s", "reconnecting")
|
|
require.Equal(t, tc.ignore, ignore, "nodes-nil: %s", "ignore")
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReconcile_shouldFilter(t *testing.T) {
|
|
testCases := []struct {
|
|
description string
|
|
batch bool
|
|
failed bool
|
|
desiredStatus string
|
|
clientStatus string
|
|
|
|
untainted bool
|
|
ignore bool
|
|
}{
|
|
{
|
|
description: "batch running",
|
|
batch: true,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusRun,
|
|
clientStatus: structs.AllocClientStatusRunning,
|
|
untainted: true,
|
|
ignore: false,
|
|
},
|
|
{
|
|
description: "batch stopped success",
|
|
batch: true,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusStop,
|
|
clientStatus: structs.AllocClientStatusRunning,
|
|
untainted: true,
|
|
ignore: false,
|
|
},
|
|
{
|
|
description: "batch stopped failed",
|
|
batch: true,
|
|
failed: true,
|
|
desiredStatus: structs.AllocDesiredStatusStop,
|
|
clientStatus: structs.AllocClientStatusComplete,
|
|
untainted: false,
|
|
ignore: true,
|
|
},
|
|
{
|
|
description: "batch evicted",
|
|
batch: true,
|
|
desiredStatus: structs.AllocDesiredStatusEvict,
|
|
clientStatus: structs.AllocClientStatusComplete,
|
|
untainted: false,
|
|
ignore: true,
|
|
},
|
|
{
|
|
description: "batch failed",
|
|
batch: true,
|
|
desiredStatus: structs.AllocDesiredStatusRun,
|
|
clientStatus: structs.AllocClientStatusFailed,
|
|
untainted: false,
|
|
ignore: false,
|
|
},
|
|
{
|
|
description: "service running",
|
|
batch: false,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusRun,
|
|
clientStatus: structs.AllocClientStatusRunning,
|
|
untainted: false,
|
|
ignore: false,
|
|
},
|
|
{
|
|
description: "service stopped",
|
|
batch: false,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusStop,
|
|
clientStatus: structs.AllocClientStatusComplete,
|
|
untainted: false,
|
|
ignore: true,
|
|
},
|
|
{
|
|
description: "service evicted",
|
|
batch: false,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusEvict,
|
|
clientStatus: structs.AllocClientStatusComplete,
|
|
untainted: false,
|
|
ignore: true,
|
|
},
|
|
{
|
|
description: "service client complete",
|
|
batch: false,
|
|
failed: false,
|
|
desiredStatus: structs.AllocDesiredStatusRun,
|
|
clientStatus: structs.AllocClientStatusComplete,
|
|
untainted: false,
|
|
ignore: true,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.description, func(t *testing.T) {
|
|
alloc := &structs.Allocation{
|
|
DesiredStatus: tc.desiredStatus,
|
|
TaskStates: map[string]*structs.TaskState{"task": {State: structs.TaskStateDead, Failed: tc.failed}},
|
|
ClientStatus: tc.clientStatus,
|
|
}
|
|
|
|
untainted, ignore := shouldFilter(alloc, tc.batch)
|
|
require.Equal(t, tc.untainted, untainted)
|
|
require.Equal(t, tc.ignore, ignore)
|
|
})
|
|
}
|
|
}
|