consul: avoid triggering unnecessary sync when removing workload
There are bits of logic in callers of RemoveWorkload on group/task cleanup hooks which call RemoveWorkload with the "Canary" version of the workload, in case the alloc is marked as a Canary. This logic triggers an extra sync with Consul, and also doesn't do the intended behavior - for which no special casing is necessary anyway. When the workload is marked for removal, all associated services and checks will be removed regardless of the Canary status, because the service and check IDs do not incorporate the canary-ness in the first place. The only place where canary-ness matters is when updating a workload, where we need to compute the hash of the services and checks to determine whether they have been modified, the Canary flag of which is a part of that. Fixes #10842
This commit is contained in:
parent
94913d2ad6
commit
c8260c3940
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
consul: remove ineffective edge case handling on service deregistration
|
||||||
|
```
|
|
@ -120,12 +120,12 @@ func TestAllocRunner_Restore_RunningTerminal(t *testing.T) {
|
||||||
require.Error(t, logmonProc.Signal(syscall.Signal(0)))
|
require.Error(t, logmonProc.Signal(syscall.Signal(0)))
|
||||||
|
|
||||||
// Assert consul was cleaned up:
|
// Assert consul was cleaned up:
|
||||||
// 2 removals (canary+noncanary) during prekill
|
// 1 removal during prekill
|
||||||
// 2 removals (canary+noncanary) during exited
|
// 1 removal during exited
|
||||||
// 2 removals (canary+noncanary) during stop
|
// 1 removal during stop
|
||||||
// 2 removals (canary+noncanary) group during stop
|
// 1 removal group during stop
|
||||||
consulOps := conf2.Consul.(*consul.MockConsulServiceClient).GetOps()
|
consulOps := conf2.Consul.(*consul.MockConsulServiceClient).GetOps()
|
||||||
require.Len(t, consulOps, 8)
|
require.Len(t, consulOps, 4)
|
||||||
for _, op := range consulOps {
|
for _, op := range consulOps {
|
||||||
require.Equal(t, "remove", op.Op)
|
require.Equal(t, "remove", op.Op)
|
||||||
}
|
}
|
||||||
|
|
|
@ -208,11 +208,6 @@ func (h *groupServiceHook) deregister() {
|
||||||
if len(h.services) > 0 {
|
if len(h.services) > 0 {
|
||||||
workloadServices := h.getWorkloadServices()
|
workloadServices := h.getWorkloadServices()
|
||||||
h.consulClient.RemoveWorkload(workloadServices)
|
h.consulClient.RemoveWorkload(workloadServices)
|
||||||
|
|
||||||
// Canary flag may be getting flipped when the alloc is being
|
|
||||||
// destroyed, so remove both variations of the service
|
|
||||||
workloadServices.Canary = !workloadServices.Canary
|
|
||||||
h.consulClient.RemoveWorkload(workloadServices)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,14 +54,12 @@ func TestGroupServiceHook_NoGroupServices(t *testing.T) {
|
||||||
require.NoError(t, h.PreTaskRestart())
|
require.NoError(t, h.PreTaskRestart())
|
||||||
|
|
||||||
ops := consulClient.GetOps()
|
ops := consulClient.GetOps()
|
||||||
require.Len(t, ops, 7)
|
require.Len(t, ops, 5)
|
||||||
require.Equal(t, "add", ops[0].Op) // Prerun
|
require.Equal(t, "add", ops[0].Op) // Prerun
|
||||||
require.Equal(t, "update", ops[1].Op) // Update
|
require.Equal(t, "update", ops[1].Op) // Update
|
||||||
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
|
require.Equal(t, "remove", ops[2].Op) // Postrun
|
||||||
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
|
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
|
||||||
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
|
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
|
||||||
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
|
|
||||||
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestGroupServiceHook_ShutdownDelayUpdate asserts calling group service hooks
|
// TestGroupServiceHook_ShutdownDelayUpdate asserts calling group service hooks
|
||||||
|
@ -127,14 +125,12 @@ func TestGroupServiceHook_GroupServices(t *testing.T) {
|
||||||
require.NoError(t, h.PreTaskRestart())
|
require.NoError(t, h.PreTaskRestart())
|
||||||
|
|
||||||
ops := consulClient.GetOps()
|
ops := consulClient.GetOps()
|
||||||
require.Len(t, ops, 7)
|
require.Len(t, ops, 5)
|
||||||
require.Equal(t, "add", ops[0].Op) // Prerun
|
require.Equal(t, "add", ops[0].Op) // Prerun
|
||||||
require.Equal(t, "update", ops[1].Op) // Update
|
require.Equal(t, "update", ops[1].Op) // Update
|
||||||
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
|
require.Equal(t, "remove", ops[2].Op) // Postrun
|
||||||
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
|
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
|
||||||
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
|
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
|
||||||
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
|
|
||||||
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestGroupServiceHook_Error asserts group service hooks with group
|
// TestGroupServiceHook_Error asserts group service hooks with group
|
||||||
|
@ -175,14 +171,12 @@ func TestGroupServiceHook_NoNetwork(t *testing.T) {
|
||||||
require.NoError(t, h.PreTaskRestart())
|
require.NoError(t, h.PreTaskRestart())
|
||||||
|
|
||||||
ops := consulClient.GetOps()
|
ops := consulClient.GetOps()
|
||||||
require.Len(t, ops, 7)
|
require.Len(t, ops, 5)
|
||||||
require.Equal(t, "add", ops[0].Op) // Prerun
|
require.Equal(t, "add", ops[0].Op) // Prerun
|
||||||
require.Equal(t, "update", ops[1].Op) // Update
|
require.Equal(t, "update", ops[1].Op) // Update
|
||||||
require.Equal(t, "remove", ops[2].Op) // Postrun (1st)
|
require.Equal(t, "remove", ops[2].Op) // Postrun
|
||||||
require.Equal(t, "remove", ops[3].Op) // Postrun (2nd)
|
require.Equal(t, "remove", ops[3].Op) // Restart -> preKill
|
||||||
require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st)
|
require.Equal(t, "add", ops[4].Op) // Restart -> preRun
|
||||||
require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd)
|
|
||||||
require.Equal(t, "add", ops[6].Op) // Restart -> preRun
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGroupServiceHook_getWorkloadServices(t *testing.T) {
|
func TestGroupServiceHook_getWorkloadServices(t *testing.T) {
|
||||||
|
|
|
@ -171,13 +171,10 @@ func (h *serviceHook) Exited(context.Context, *interfaces.TaskExitedRequest, *in
|
||||||
|
|
||||||
// deregister services from Consul.
|
// deregister services from Consul.
|
||||||
func (h *serviceHook) deregister() {
|
func (h *serviceHook) deregister() {
|
||||||
workloadServices := h.getWorkloadServices()
|
if len(h.services) > 0 {
|
||||||
h.consulServices.RemoveWorkload(workloadServices)
|
workloadServices := h.getWorkloadServices()
|
||||||
|
h.consulServices.RemoveWorkload(workloadServices)
|
||||||
// Canary flag may be getting flipped when the alloc is being
|
}
|
||||||
// destroyed, so remove both variations of the service
|
|
||||||
workloadServices.Canary = !workloadServices.Canary
|
|
||||||
h.consulServices.RemoveWorkload(workloadServices)
|
|
||||||
h.initialRegistration = false
|
h.initialRegistration = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,16 +39,15 @@ func TestUpdate_beforePoststart(t *testing.T) {
|
||||||
// so Update should again wait on Poststart.
|
// so Update should again wait on Poststart.
|
||||||
|
|
||||||
require.NoError(t, hook.Exited(context.Background(), &interfaces.TaskExitedRequest{}, &interfaces.TaskExitedResponse{}))
|
require.NoError(t, hook.Exited(context.Background(), &interfaces.TaskExitedRequest{}, &interfaces.TaskExitedResponse{}))
|
||||||
|
require.Len(t, c.GetOps(), 3)
|
||||||
|
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
||||||
|
require.Len(t, c.GetOps(), 3)
|
||||||
|
require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{}))
|
||||||
require.Len(t, c.GetOps(), 4)
|
require.Len(t, c.GetOps(), 4)
|
||||||
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
||||||
require.Len(t, c.GetOps(), 4)
|
|
||||||
require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{}))
|
|
||||||
require.Len(t, c.GetOps(), 5)
|
require.Len(t, c.GetOps(), 5)
|
||||||
|
require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{}))
|
||||||
|
require.Len(t, c.GetOps(), 6)
|
||||||
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
||||||
require.Len(t, c.GetOps(), 6)
|
require.Len(t, c.GetOps(), 6)
|
||||||
require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{}))
|
|
||||||
require.Len(t, c.GetOps(), 8)
|
|
||||||
require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{}))
|
|
||||||
require.Len(t, c.GetOps(), 8)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -957,17 +957,16 @@ func TestTaskRunner_ShutdownDelay(t *testing.T) {
|
||||||
assert.NoError(t, tr.Kill(context.Background(), structs.NewTaskEvent("test")))
|
assert.NoError(t, tr.Kill(context.Background(), structs.NewTaskEvent("test")))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait for *2* deregistration calls (due to needing to remove both
|
// Wait for *1* de-registration calls (all [non-]canary variants removed).
|
||||||
// canary tag variants)
|
|
||||||
WAIT:
|
WAIT:
|
||||||
for {
|
for {
|
||||||
ops := mockConsul.GetOps()
|
ops := mockConsul.GetOps()
|
||||||
switch n := len(ops); n {
|
switch n := len(ops); n {
|
||||||
case 1, 2:
|
case 1:
|
||||||
// Waiting for both deregistration calls
|
// Waiting for single de-registration call.
|
||||||
case 3:
|
case 2:
|
||||||
require.Equalf(t, "remove", ops[1].Op, "expected deregistration but found: %#v", ops[1])
|
require.Equalf(t, "remove", ops[1].Op, "expected deregistration but found: %#v", ops[1])
|
||||||
require.Equalf(t, "remove", ops[2].Op, "expected deregistration but found: %#v", ops[2])
|
|
||||||
break WAIT
|
break WAIT
|
||||||
default:
|
default:
|
||||||
// ?!
|
// ?!
|
||||||
|
@ -2401,25 +2400,22 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) {
|
||||||
|
|
||||||
consul := conf.Consul.(*consulapi.MockConsulServiceClient)
|
consul := conf.Consul.(*consulapi.MockConsulServiceClient)
|
||||||
consulOps := consul.GetOps()
|
consulOps := consul.GetOps()
|
||||||
require.Len(t, consulOps, 8)
|
require.Len(t, consulOps, 5)
|
||||||
|
|
||||||
// Initial add
|
// Initial add
|
||||||
require.Equal(t, "add", consulOps[0].Op)
|
require.Equal(t, "add", consulOps[0].Op)
|
||||||
|
|
||||||
// Removing canary and non-canary entries on first exit
|
// Removing entries on first exit
|
||||||
require.Equal(t, "remove", consulOps[1].Op)
|
require.Equal(t, "remove", consulOps[1].Op)
|
||||||
require.Equal(t, "remove", consulOps[2].Op)
|
|
||||||
|
|
||||||
// Second add on retry
|
// Second add on retry
|
||||||
require.Equal(t, "add", consulOps[3].Op)
|
require.Equal(t, "add", consulOps[2].Op)
|
||||||
|
|
||||||
// Removing canary and non-canary entries on retry
|
// Removing entries on retry
|
||||||
|
require.Equal(t, "remove", consulOps[3].Op)
|
||||||
|
|
||||||
|
// Removing entries on stop
|
||||||
require.Equal(t, "remove", consulOps[4].Op)
|
require.Equal(t, "remove", consulOps[4].Op)
|
||||||
require.Equal(t, "remove", consulOps[5].Op)
|
|
||||||
|
|
||||||
// Removing canary and non-canary entries on stop
|
|
||||||
require.Equal(t, "remove", consulOps[6].Op)
|
|
||||||
require.Equal(t, "remove", consulOps[7].Op)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// testWaitForTaskToStart waits for the task to be running or fails the test
|
// testWaitForTaskToStart waits for the task to be running or fails the test
|
||||||
|
|
Loading…
Reference in New Issue