reconciler: Handle canaries when client disconnects (#12539)
* plan_apply: Allow node updates in disconnected node plans * plan: Keep the job when persisting unknown allocs * reconciler: stop unknown allocs when stopping all * reconcile_util: reorder filtering to handle canaries; skip rescheduling unknown * heartbeat: Fix bug in node heartbeating
This commit is contained in:
parent
2ad9f6bc5f
commit
5e309f3f33
|
@ -167,7 +167,6 @@ func (h *nodeHeartbeater) invalidateHeartbeat(id string) {
|
|||
if canDisconnect && hasPendingReconnects {
|
||||
req.Status = structs.NodeStatusDisconnected
|
||||
}
|
||||
|
||||
var resp structs.NodeUpdateResponse
|
||||
if err := h.staticEndpoints.Node.UpdateStatus(&req, &resp); err != nil {
|
||||
h.logger.Error("update node status failed", "error", err)
|
||||
|
@ -181,8 +180,8 @@ func (h *nodeHeartbeater) disconnectState(id string) (bool, bool) {
|
|||
return false, false
|
||||
}
|
||||
|
||||
// Exit if this is the node already in a state other than ready.
|
||||
if node.Status != structs.NodeStatusReady {
|
||||
// Exit if the node is already down or just initializing.
|
||||
if node.Status == structs.NodeStatusDown || node.Status == structs.NodeStatusInit {
|
||||
return false, false
|
||||
}
|
||||
|
||||
|
|
|
@ -573,7 +573,8 @@ func nodeStatusTransitionRequiresEval(newStatus, oldStatus string) bool {
|
|||
initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
|
||||
terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
|
||||
disconnectedToOther := oldStatus == structs.NodeStatusDisconnected && newStatus != structs.NodeStatusDisconnected
|
||||
return initToReady || terminalToReady || disconnectedToOther
|
||||
otherToDisconnected := oldStatus != structs.NodeStatusDisconnected && newStatus == structs.NodeStatusDisconnected
|
||||
return initToReady || terminalToReady || disconnectedToOther || otherToDisconnected
|
||||
}
|
||||
|
||||
// UpdateDrain is used to update the drain mode of a client node
|
||||
|
|
|
@ -705,10 +705,6 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri
|
|||
// The plan is only valid for disconnected nodes if it only contains
|
||||
// updates to mark allocations as unknown.
|
||||
func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
|
||||
if len(plan.NodeUpdate[nodeID]) != 0 || len(plan.NodePreemptions[nodeID]) != 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, alloc := range plan.NodeAllocation[nodeID] {
|
||||
if alloc.ClientStatus != structs.AllocClientStatusUnknown {
|
||||
return false
|
||||
|
|
|
@ -11305,8 +11305,6 @@ func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string)
|
|||
|
||||
// AppendUnknownAlloc marks an allocation as unknown.
|
||||
func (p *Plan) AppendUnknownAlloc(alloc *Allocation) {
|
||||
// Strip the job as it's set once on the ApplyPlanResultRequest.
|
||||
alloc.Job = nil
|
||||
// Strip the resources as they can be rebuilt.
|
||||
alloc.Resources = nil
|
||||
|
||||
|
|
|
@ -344,12 +344,13 @@ func (a *allocReconciler) handleStop(m allocMatrix) {
|
|||
// filterAndStopAll stops all allocations in an allocSet. This is useful in when
|
||||
// stopping an entire job or task group.
|
||||
func (a *allocReconciler) filterAndStopAll(set allocSet) uint64 {
|
||||
untainted, migrate, lost, disconnecting, reconnecting, _ := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
|
||||
untainted, migrate, lost, disconnecting, reconnecting, ignore := set.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
|
||||
a.markStop(untainted, "", allocNotNeeded)
|
||||
a.markStop(migrate, "", allocNotNeeded)
|
||||
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
||||
a.markStop(disconnecting, "", allocNotNeeded)
|
||||
a.markStop(reconnecting, "", allocNotNeeded)
|
||||
a.markStop(ignore.filterByClientStatus(structs.AllocClientStatusUnknown), "", allocNotNeeded)
|
||||
return uint64(len(set))
|
||||
}
|
||||
|
||||
|
@ -462,7 +463,6 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
|
|||
if isCanarying {
|
||||
untainted = untainted.difference(canaries)
|
||||
}
|
||||
|
||||
requiresCanaries := a.requiresCanaries(tg, dstate, destructive, canaries)
|
||||
if requiresCanaries {
|
||||
a.computeCanaries(tg, dstate, destructive, canaries, desiredChanges, nameIndex)
|
||||
|
@ -619,6 +619,9 @@ func (a *allocReconciler) cancelUnneededCanaries(original allocSet, desiredChang
|
|||
|
||||
canaries = all.fromKeys(canaryIDs)
|
||||
untainted, migrate, lost, _, _, _ := canaries.filterByTainted(a.taintedNodes, a.supportsDisconnectedClients, a.now)
|
||||
// We don't add these stops to desiredChanges because the deployment is
|
||||
// still active. DesiredChanges is used to report deployment progress/final
|
||||
// state. These transient failures aren't meaningful.
|
||||
a.markStop(migrate, "", allocMigrating)
|
||||
a.markStop(lost, structs.AllocClientStatusLost, allocLost)
|
||||
|
||||
|
|
|
@ -5549,6 +5549,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
|
|||
alloc.ClientStatus = tc.disconnectedAllocStatus
|
||||
// Set the node id on all the disconnected allocs to the node under test.
|
||||
alloc.NodeID = testNode.ID
|
||||
alloc.NodeName = "disconnected"
|
||||
|
||||
alloc.AllocStates = []*structs.AllocState{{
|
||||
Field: structs.AllocStateFieldClientStatus,
|
||||
|
@ -5631,3 +5632,366 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that a client disconnect while a canary is in progress generates the result.
|
||||
func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
|
||||
|
||||
type testCase struct {
|
||||
name string
|
||||
nodes []string
|
||||
deploymentState *structs.DeploymentState
|
||||
deployedAllocs map[*structs.Node][]*structs.Allocation
|
||||
canaryAllocs map[*structs.Node][]*structs.Allocation
|
||||
expectedResult *resultExpectation
|
||||
}
|
||||
|
||||
running := structs.AllocClientStatusRunning
|
||||
complete := structs.AllocClientStatusComplete
|
||||
unknown := structs.AllocClientStatusUnknown
|
||||
pending := structs.AllocClientStatusPending
|
||||
run := structs.AllocDesiredStatusRun
|
||||
stop := structs.AllocDesiredStatusStop
|
||||
|
||||
maxClientDisconnect := 10 * time.Minute
|
||||
|
||||
readyNode := mock.Node()
|
||||
readyNode.Name = "ready-" + readyNode.ID
|
||||
readyNode.Status = structs.NodeStatusReady
|
||||
|
||||
disconnectedNode := mock.Node()
|
||||
disconnectedNode.Name = "disconnected-" + disconnectedNode.ID
|
||||
disconnectedNode.Status = structs.NodeStatusDisconnected
|
||||
|
||||
// Job with allocations and max_client_disconnect
|
||||
job := mock.Job()
|
||||
|
||||
updatedJob := job.Copy()
|
||||
updatedJob.Version = updatedJob.Version + 1
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
name: "3-placed-1-disconnect",
|
||||
deploymentState: &structs.DeploymentState{
|
||||
AutoRevert: false,
|
||||
AutoPromote: false,
|
||||
Promoted: false,
|
||||
ProgressDeadline: 5 * time.Minute,
|
||||
RequireProgressBy: time.Now().Add(5 * time.Minute),
|
||||
PlacedCanaries: []string{},
|
||||
DesiredCanaries: 1,
|
||||
DesiredTotal: 6,
|
||||
PlacedAllocs: 3,
|
||||
HealthyAllocs: 2,
|
||||
UnhealthyAllocs: 0,
|
||||
},
|
||||
deployedAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// Ignored
|
||||
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
|
||||
// destructive, but discarded because canarying
|
||||
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
canaryAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// Ignored
|
||||
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
|
||||
// Ignored
|
||||
{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[1]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
expectedResult: &resultExpectation{
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
place: 3,
|
||||
destructive: 0,
|
||||
stop: 0,
|
||||
inplace: 0,
|
||||
attributeUpdates: 0,
|
||||
disconnectUpdates: 3,
|
||||
reconnectUpdates: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
updatedJob.TaskGroups[0].Name: {
|
||||
Place: 3,
|
||||
Canary: 0,
|
||||
Ignore: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ignore-unknown",
|
||||
deploymentState: &structs.DeploymentState{
|
||||
AutoRevert: false,
|
||||
AutoPromote: false,
|
||||
Promoted: false,
|
||||
ProgressDeadline: 5 * time.Minute,
|
||||
RequireProgressBy: time.Now().Add(5 * time.Minute),
|
||||
PlacedCanaries: []string{},
|
||||
DesiredCanaries: 1,
|
||||
DesiredTotal: 6,
|
||||
PlacedAllocs: 3,
|
||||
HealthyAllocs: 2,
|
||||
UnhealthyAllocs: 0,
|
||||
},
|
||||
deployedAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// Ignored
|
||||
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: stop},
|
||||
// destructive, but discarded because canarying
|
||||
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
canaryAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// Ignored
|
||||
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
|
||||
// Ignored
|
||||
{Name: "my-job.web[2]", ClientStatus: pending, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// Ignored
|
||||
{Name: "my-job.web[1]", ClientStatus: unknown, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
expectedResult: &resultExpectation{
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
place: 2,
|
||||
destructive: 0,
|
||||
stop: 0,
|
||||
inplace: 0,
|
||||
attributeUpdates: 0,
|
||||
disconnectUpdates: 2,
|
||||
reconnectUpdates: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
updatedJob.TaskGroups[0].Name: {
|
||||
Place: 2,
|
||||
Canary: 0,
|
||||
Ignore: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "4-placed-2-pending-lost",
|
||||
deploymentState: &structs.DeploymentState{
|
||||
AutoRevert: false,
|
||||
AutoPromote: false,
|
||||
Promoted: false,
|
||||
ProgressDeadline: 5 * time.Minute,
|
||||
RequireProgressBy: time.Now().Add(5 * time.Minute),
|
||||
PlacedCanaries: []string{},
|
||||
DesiredCanaries: 2,
|
||||
DesiredTotal: 6,
|
||||
PlacedAllocs: 4,
|
||||
HealthyAllocs: 2,
|
||||
UnhealthyAllocs: 0,
|
||||
},
|
||||
deployedAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[0]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[2]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// destructive, but discarded because canarying
|
||||
{Name: "my-job.web[4]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// filtered as terminal
|
||||
{Name: "my-job.web[1]", ClientStatus: complete, DesiredStatus: stop},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[3]", ClientStatus: running, DesiredStatus: run},
|
||||
// Gets a placement, and a disconnect update
|
||||
{Name: "my-job.web[5]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
canaryAllocs: map[*structs.Node][]*structs.Allocation{
|
||||
readyNode: {
|
||||
// Ignored
|
||||
{Name: "my-job.web[0]", ClientStatus: running, DesiredStatus: run},
|
||||
// Ignored
|
||||
{Name: "my-job.web[2]", ClientStatus: running, DesiredStatus: run},
|
||||
},
|
||||
disconnectedNode: {
|
||||
// Stop/Lost because pending
|
||||
{Name: "my-job.web[1]", ClientStatus: pending, DesiredStatus: run},
|
||||
// Stop/Lost because pending
|
||||
{Name: "my-job.web[3]", ClientStatus: pending, DesiredStatus: run},
|
||||
},
|
||||
},
|
||||
expectedResult: &resultExpectation{
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
place: 2,
|
||||
destructive: 0,
|
||||
stop: 2,
|
||||
inplace: 0,
|
||||
attributeUpdates: 0,
|
||||
disconnectUpdates: 2,
|
||||
reconnectUpdates: 0,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
updatedJob.TaskGroups[0].Name: {
|
||||
Place: 2,
|
||||
Canary: 0,
|
||||
Ignore: 3,
|
||||
// The 2 stops in this test are transient failures, but
|
||||
// the deployment can still progress. We don't include
|
||||
// them in the stop count since DesiredTGUpdates is used
|
||||
// to report deployment progress or final deployment state.
|
||||
Stop: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set the count dynamically to the number from the original deployment.
|
||||
job.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
|
||||
job.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
|
||||
job.TaskGroups[0].Update = &structs.UpdateStrategy{
|
||||
MaxParallel: 1,
|
||||
Canary: tc.deploymentState.DesiredCanaries,
|
||||
MinHealthyTime: 3 * time.Second,
|
||||
HealthyDeadline: 20 * time.Second,
|
||||
AutoRevert: true,
|
||||
AutoPromote: true,
|
||||
}
|
||||
|
||||
updatedJob.TaskGroups[0].Count = len(tc.deployedAllocs[readyNode]) + len(tc.deployedAllocs[disconnectedNode])
|
||||
updatedJob.TaskGroups[0].MaxClientDisconnect = &maxClientDisconnect
|
||||
updatedJob.TaskGroups[0].Update = &structs.UpdateStrategy{
|
||||
MaxParallel: 1,
|
||||
Canary: tc.deploymentState.DesiredCanaries,
|
||||
MinHealthyTime: 3 * time.Second,
|
||||
HealthyDeadline: 20 * time.Second,
|
||||
AutoRevert: true,
|
||||
AutoPromote: true,
|
||||
}
|
||||
|
||||
// Populate Alloc IDS, Node IDs, Job on deployed allocs
|
||||
allocsConfigured := 0
|
||||
for node, allocs := range tc.deployedAllocs {
|
||||
for _, alloc := range allocs {
|
||||
alloc.ID = uuid.Generate()
|
||||
alloc.NodeID = node.ID
|
||||
alloc.NodeName = node.Name
|
||||
alloc.JobID = job.ID
|
||||
alloc.Job = job
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
allocsConfigured++
|
||||
}
|
||||
}
|
||||
|
||||
require.Equal(t, tc.deploymentState.DesiredTotal, allocsConfigured, "invalid alloc configuration: expect %d got %d", tc.deploymentState.DesiredTotal, allocsConfigured)
|
||||
|
||||
// Populate Alloc IDS, Node IDs, Job on canaries
|
||||
canariesConfigured := 0
|
||||
handled := make(map[string]allocUpdateType)
|
||||
for node, allocs := range tc.canaryAllocs {
|
||||
for _, alloc := range allocs {
|
||||
alloc.ID = uuid.Generate()
|
||||
alloc.NodeID = node.ID
|
||||
alloc.NodeName = node.Name
|
||||
alloc.JobID = updatedJob.ID
|
||||
alloc.Job = updatedJob
|
||||
alloc.TaskGroup = updatedJob.TaskGroups[0].Name
|
||||
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Canary: true,
|
||||
}
|
||||
if alloc.ClientStatus == structs.AllocClientStatusRunning {
|
||||
alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
|
||||
}
|
||||
tc.deploymentState.PlacedCanaries = append(tc.deploymentState.PlacedCanaries, alloc.ID)
|
||||
handled[alloc.ID] = allocUpdateFnIgnore
|
||||
canariesConfigured++
|
||||
}
|
||||
}
|
||||
|
||||
// Validate tc.canaryAllocs against tc.deploymentState
|
||||
require.Equal(t, tc.deploymentState.PlacedAllocs, canariesConfigured, "invalid canary configuration: expect %d got %d", tc.deploymentState.PlacedAllocs, canariesConfigured)
|
||||
|
||||
deployment := structs.NewDeployment(updatedJob, 50)
|
||||
deployment.TaskGroups[updatedJob.TaskGroups[0].Name] = tc.deploymentState
|
||||
|
||||
// Build a map of tainted nodes that contains the last canary
|
||||
tainted := make(map[string]*structs.Node, 1)
|
||||
tainted[disconnectedNode.ID] = disconnectedNode
|
||||
|
||||
allocs := make([]*structs.Allocation, 0)
|
||||
|
||||
allocs = append(allocs, tc.deployedAllocs[readyNode]...)
|
||||
allocs = append(allocs, tc.deployedAllocs[disconnectedNode]...)
|
||||
allocs = append(allocs, tc.canaryAllocs[readyNode]...)
|
||||
allocs = append(allocs, tc.canaryAllocs[disconnectedNode]...)
|
||||
|
||||
mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
|
||||
reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, updatedJob.ID, updatedJob,
|
||||
deployment, allocs, tainted, "", 50, true)
|
||||
result := reconciler.Compute()
|
||||
|
||||
// Assert the correct results
|
||||
assertResults(t, result, tc.expectedResult)
|
||||
|
||||
// Validate that placements are either for placed canaries for the
|
||||
// updated job, or for disconnected allocs for the original job
|
||||
// and that they have a disconnect update.
|
||||
for _, placeResult := range result.place {
|
||||
found := false
|
||||
require.NotNil(t, placeResult.previousAlloc)
|
||||
for _, deployed := range tc.deployedAllocs[disconnectedNode] {
|
||||
if deployed.ID == placeResult.previousAlloc.ID {
|
||||
found = true
|
||||
require.Equal(t, job.Version, placeResult.previousAlloc.Job.Version)
|
||||
require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
|
||||
_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
|
||||
require.True(t, exists)
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, canary := range tc.canaryAllocs[disconnectedNode] {
|
||||
if canary.ID == placeResult.previousAlloc.ID {
|
||||
found = true
|
||||
require.Equal(t, updatedJob.Version, placeResult.previousAlloc.Job.Version)
|
||||
require.Equal(t, disconnectedNode.ID, placeResult.previousAlloc.NodeID)
|
||||
_, exists := result.disconnectUpdates[placeResult.previousAlloc.ID]
|
||||
require.True(t, exists)
|
||||
break
|
||||
}
|
||||
}
|
||||
require.True(t, found)
|
||||
}
|
||||
|
||||
// Validate that stops are for pending disconnects
|
||||
for _, stopResult := range result.stop {
|
||||
require.Equal(t, pending, stopResult.alloc.ClientStatus)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -225,7 +225,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
|
|||
ignore = make(map[string]*structs.Allocation)
|
||||
|
||||
for _, alloc := range a {
|
||||
|
||||
// make sure we don't apply any reconnect logic to task groups
|
||||
// without max_client_disconnect
|
||||
supportsDisconnectedClients := alloc.SupportsDisconnectedClients(serverSupportsDisconnectedClients)
|
||||
|
@ -251,6 +250,40 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
|
|||
continue
|
||||
}
|
||||
|
||||
taintedNode, nodeIsTainted := taintedNodes[alloc.NodeID]
|
||||
if taintedNode != nil {
|
||||
// Group disconnecting/reconnecting
|
||||
switch taintedNode.Status {
|
||||
case structs.NodeStatusDisconnected:
|
||||
if supportsDisconnectedClients {
|
||||
// Filter running allocs on a node that is disconnected to be marked as unknown.
|
||||
if alloc.ClientStatus == structs.AllocClientStatusRunning {
|
||||
disconnecting[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
// Filter pending allocs on a node that is disconnected to be marked as lost.
|
||||
if alloc.ClientStatus == structs.AllocClientStatusPending {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
case structs.NodeStatusReady:
|
||||
// Filter reconnecting allocs on a node that is now connected.
|
||||
if reconnected {
|
||||
if expired {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
reconnecting[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Terminal allocs, if not reconnected, are always untainted as they
|
||||
// should never be migrated.
|
||||
if alloc.TerminalStatus() && !reconnected {
|
||||
|
@ -287,8 +320,7 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
|
|||
continue
|
||||
}
|
||||
|
||||
taintedNode, ok := taintedNodes[alloc.NodeID]
|
||||
if !ok {
|
||||
if !nodeIsTainted {
|
||||
// Filter allocs on a node that is now re-connected to be resumed.
|
||||
if reconnected {
|
||||
if expired {
|
||||
|
@ -304,39 +336,6 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
|
|||
continue
|
||||
}
|
||||
|
||||
if taintedNode != nil {
|
||||
// Group disconnecting/reconnecting
|
||||
switch taintedNode.Status {
|
||||
case structs.NodeStatusDisconnected:
|
||||
if supportsDisconnectedClients {
|
||||
// Filter running allocs on a node that is disconnected to be marked as unknown.
|
||||
if alloc.ClientStatus == structs.AllocClientStatusRunning {
|
||||
disconnecting[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
// Filter pending allocs on a node that is disconnected to be marked as lost.
|
||||
if alloc.ClientStatus == structs.AllocClientStatusPending {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
case structs.NodeStatusReady:
|
||||
// Filter reconnecting allocs with replacements on a node that is now connected.
|
||||
if reconnected {
|
||||
if expired {
|
||||
lost[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
reconnecting[alloc.ID] = alloc
|
||||
continue
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Allocs on GC'd (nil) or lost nodes are Lost
|
||||
if taintedNode == nil || taintedNode.TerminalStatus() {
|
||||
lost[alloc.ID] = alloc
|
||||
|
@ -361,6 +360,12 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
|
|||
// When filtering disconnected sets, the untainted set is never populated.
|
||||
// It has no purpose in that context.
|
||||
for _, alloc := range a {
|
||||
// Ignore disconnecting allocs that are already unknown. This can happen
|
||||
// in the case of canaries that are interrupted by a disconnect.
|
||||
if isDisconnecting && alloc.ClientStatus == structs.AllocClientStatusUnknown {
|
||||
continue
|
||||
}
|
||||
|
||||
var eligibleNow, eligibleLater bool
|
||||
var rescheduleTime time.Time
|
||||
|
||||
|
@ -557,6 +562,18 @@ func (a allocSet) delayByMaxClientDisconnect(now time.Time) (later []*delayedRes
|
|||
return
|
||||
}
|
||||
|
||||
// filterByClientStatus returns allocs from the set with the specified client status.
|
||||
func (a allocSet) filterByClientStatus(clientStatus string) allocSet {
|
||||
allocs := make(allocSet)
|
||||
for _, alloc := range a {
|
||||
if alloc.ClientStatus == clientStatus {
|
||||
allocs[alloc.ID] = alloc
|
||||
}
|
||||
}
|
||||
|
||||
return allocs
|
||||
}
|
||||
|
||||
// allocNameIndex is used to select allocation names for placement or removal
|
||||
// given an existing set of placed allocations.
|
||||
type allocNameIndex struct {
|
||||
|
|
|
@ -266,23 +266,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
all: allocSet{
|
||||
// Non-terminal allocs on disconnected nodes w/o max-disconnect are lost
|
||||
"lost-running": {
|
||||
ID: "lost-running",
|
||||
Name: "lost-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJobNoMaxDisconnect,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
// Unknown allocs on disconnected nodes w/o max-disconnect are lost
|
||||
"lost-unknown": {
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
ID: "lost-running",
|
||||
Name: "lost-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJobNoMaxDisconnect,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
},
|
||||
},
|
||||
untainted: allocSet{},
|
||||
|
@ -292,22 +282,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ignore: allocSet{},
|
||||
lost: allocSet{
|
||||
"lost-running": {
|
||||
ID: "lost-running",
|
||||
Name: "lost-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJobNoMaxDisconnect,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
"lost-unknown": {
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
ID: "lost-running",
|
||||
Name: "lost-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJobNoMaxDisconnect,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -324,6 +305,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "running-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -349,6 +331,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "running-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -384,14 +367,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
// Node.UpdateStatus has already handled syncing client state so this
|
||||
// should be a noop.
|
||||
"reconnecting-running-no-replacement": {
|
||||
ID: "reconnecting-running-no-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "reconnecting-running-no-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
},
|
||||
untainted: allocSet{},
|
||||
|
@ -399,14 +383,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
disconnecting: allocSet{},
|
||||
reconnecting: allocSet{
|
||||
"reconnecting-running-no-replacement": {
|
||||
ID: "reconnecting-running-no-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "reconnecting-running-no-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
},
|
||||
ignore: allocSet{},
|
||||
|
@ -421,43 +406,47 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
all: allocSet{
|
||||
// Allocs on reconnected nodes that are complete are untainted
|
||||
"untainted-reconnect-complete": {
|
||||
ID: "untainted-reconnect-complete",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "untainted-reconnect-complete",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
// Failed allocs on reconnected nodes are in reconnecting so that
|
||||
// they be marked with desired status stop at the server.
|
||||
"reconnecting-failed": {
|
||||
ID: "reconnecting-failed",
|
||||
Name: "reconnecting-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "reconnecting-failed",
|
||||
Name: "reconnecting-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
// Lost allocs on reconnected nodes don't get restarted
|
||||
"untainted-reconnect-lost": {
|
||||
ID: "untainted-reconnect-lost",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "untainted-reconnect-lost",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
// Replacement allocs that are complete are untainted
|
||||
"untainted-reconnect-complete-replacement": {
|
||||
ID: "untainted-reconnect-complete-replacement",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -469,6 +458,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "untainted-reconnect-failed-replacement",
|
||||
Name: "untainted-reconnect-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -480,6 +470,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "untainted-reconnect-lost-replacement",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -489,29 +480,32 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
},
|
||||
untainted: allocSet{
|
||||
"untainted-reconnect-complete": {
|
||||
ID: "untainted-reconnect-complete",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "untainted-reconnect-complete",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
"untainted-reconnect-lost": {
|
||||
ID: "untainted-reconnect-lost",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "untainted-reconnect-lost",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
"untainted-reconnect-complete-replacement": {
|
||||
ID: "untainted-reconnect-complete-replacement",
|
||||
Name: "untainted-reconnect-complete",
|
||||
ClientStatus: structs.AllocClientStatusComplete,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -522,6 +516,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "untainted-reconnect-failed-replacement",
|
||||
Name: "untainted-reconnect-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -532,6 +527,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "untainted-reconnect-lost-replacement",
|
||||
Name: "untainted-reconnect-lost",
|
||||
ClientStatus: structs.AllocClientStatusLost,
|
||||
DesiredStatus: structs.AllocDesiredStatusStop,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -543,14 +539,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
disconnecting: allocSet{},
|
||||
reconnecting: allocSet{
|
||||
"reconnecting-failed": {
|
||||
ID: "reconnecting-failed",
|
||||
Name: "reconnecting-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "reconnecting-failed",
|
||||
Name: "reconnecting-failed",
|
||||
ClientStatus: structs.AllocClientStatusFailed,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
},
|
||||
ignore: allocSet{},
|
||||
|
@ -565,12 +562,13 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
all: allocSet{
|
||||
// Non-terminal allocs on disconnected nodes are disconnecting
|
||||
"disconnect-running": {
|
||||
ID: "disconnect-running",
|
||||
Name: "disconnect-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
ID: "disconnect-running",
|
||||
Name: "disconnect-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
// Unknown allocs on disconnected nodes are ignored
|
||||
"ignore-unknown": {
|
||||
|
@ -585,13 +583,37 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
},
|
||||
// Unknown allocs on disconnected nodes are lost when expired
|
||||
"lost-unknown": {
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: expiredAllocState,
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
// Pending allocs on disconnected nodes are lost
|
||||
"lost-pending": {
|
||||
ID: "lost-pending",
|
||||
Name: "lost-pending",
|
||||
ClientStatus: structs.AllocClientStatusPending,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
// Expired allocs on reconnected clients are lost
|
||||
// Pending allocs on disconnected nodes are lost
|
||||
"lost-expired": {
|
||||
ID: "lost-expired",
|
||||
Name: "lost-expired",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
TaskStates: reconnectTaskState,
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
// Failed and stopped allocs on disconnected nodes are ignored
|
||||
"ignore-reconnected-failed-stopped": {
|
||||
|
@ -603,19 +625,20 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
TaskStates: reconnectTaskState,
|
||||
AllocStates: unknownAllocState, // TODO really?
|
||||
AllocStates: unknownAllocState,
|
||||
},
|
||||
},
|
||||
untainted: allocSet{},
|
||||
migrate: allocSet{},
|
||||
disconnecting: allocSet{
|
||||
"disconnect-running": {
|
||||
ID: "disconnect-running",
|
||||
Name: "disconnect-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
ID: "disconnect-running",
|
||||
Name: "disconnect-running",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
},
|
||||
reconnecting: allocSet{},
|
||||
|
@ -645,13 +668,73 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
},
|
||||
lost: allocSet{
|
||||
"lost-unknown": {
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: expiredAllocState,
|
||||
ID: "lost-unknown",
|
||||
Name: "lost-unknown",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
"lost-pending": {
|
||||
ID: "lost-pending",
|
||||
Name: "lost-pending",
|
||||
ClientStatus: structs.AllocClientStatusPending,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "disconnected",
|
||||
TaskGroup: "web",
|
||||
},
|
||||
"lost-expired": {
|
||||
ID: "lost-expired",
|
||||
Name: "lost-expired",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
TaskStates: reconnectTaskState,
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "disco-client-reconnect",
|
||||
supportsDisconnectedClients: true,
|
||||
now: time.Now(),
|
||||
taintedNodes: nodes,
|
||||
skipNilNodeTest: false,
|
||||
all: allocSet{
|
||||
// Expired allocs on reconnected clients are lost
|
||||
"lost-expired-reconnect": {
|
||||
ID: "lost-expired-reconnect",
|
||||
Name: "lost-expired-reconnect",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
TaskStates: reconnectTaskState,
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
},
|
||||
untainted: allocSet{},
|
||||
migrate: allocSet{},
|
||||
disconnecting: allocSet{},
|
||||
reconnecting: allocSet{},
|
||||
ignore: allocSet{},
|
||||
lost: allocSet{
|
||||
"lost-expired-reconnect": {
|
||||
ID: "lost-expired-reconnect",
|
||||
Name: "lost-expired-reconnect",
|
||||
ClientStatus: structs.AllocClientStatusUnknown,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
TaskStates: reconnectTaskState,
|
||||
AllocStates: expiredAllocState,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -666,6 +749,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "running-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -673,14 +757,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
},
|
||||
// Running and replaced allocs on reconnected nodes are reconnecting
|
||||
"running-original": {
|
||||
ID: "running-original",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "running-original",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
},
|
||||
untainted: allocSet{
|
||||
|
@ -688,6 +773,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
ID: "running-replacement",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
|
@ -698,14 +784,15 @@ func TestAllocSet_filterByTainted(t *testing.T) {
|
|||
disconnecting: allocSet{},
|
||||
reconnecting: allocSet{
|
||||
"running-original": {
|
||||
ID: "running-original",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
ID: "running-original",
|
||||
Name: "web",
|
||||
ClientStatus: structs.AllocClientStatusRunning,
|
||||
DesiredStatus: structs.AllocDesiredStatusRun,
|
||||
Job: testJob,
|
||||
NodeID: "normal",
|
||||
TaskGroup: "web",
|
||||
AllocStates: unknownAllocState,
|
||||
TaskStates: reconnectTaskState,
|
||||
},
|
||||
},
|
||||
ignore: allocSet{},
|
||||
|
|
Loading…
Reference in New Issue