diff --git a/.changelog/10842.txt b/.changelog/10842.txt new file mode 100644 index 000000000..98a4a0a7d --- /dev/null +++ b/.changelog/10842.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: remove ineffective edge case handling on service deregistration +``` diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go index 7ec4dea0b..213016254 100644 --- a/client/allocrunner/alloc_runner_unix_test.go +++ b/client/allocrunner/alloc_runner_unix_test.go @@ -120,12 +120,12 @@ func TestAllocRunner_Restore_RunningTerminal(t *testing.T) { require.Error(t, logmonProc.Signal(syscall.Signal(0))) // Assert consul was cleaned up: - // 2 removals (canary+noncanary) during prekill - // 2 removals (canary+noncanary) during exited - // 2 removals (canary+noncanary) during stop - // 2 removals (canary+noncanary) group during stop + // 1 removal during prekill + // 1 removal during exited + // 1 removal during stop + // 1 removal group during stop consulOps := conf2.Consul.(*consul.MockConsulServiceClient).GetOps() - require.Len(t, consulOps, 8) + require.Len(t, consulOps, 4) for _, op := range consulOps { require.Equal(t, "remove", op.Op) } diff --git a/client/allocrunner/groupservice_hook.go b/client/allocrunner/groupservice_hook.go index 065c94347..72f0e9fd9 100644 --- a/client/allocrunner/groupservice_hook.go +++ b/client/allocrunner/groupservice_hook.go @@ -208,11 +208,6 @@ func (h *groupServiceHook) deregister() { if len(h.services) > 0 { workloadServices := h.getWorkloadServices() h.consulClient.RemoveWorkload(workloadServices) - - // Canary flag may be getting flipped when the alloc is being - // destroyed, so remove both variations of the service - workloadServices.Canary = !workloadServices.Canary - h.consulClient.RemoveWorkload(workloadServices) } } diff --git a/client/allocrunner/groupservice_hook_test.go b/client/allocrunner/groupservice_hook_test.go index 5789079d8..61d9a38b4 100644 --- a/client/allocrunner/groupservice_hook_test.go +++ b/client/allocrunner/groupservice_hook_test.go @@ -54,14 +54,12 @@ func TestGroupServiceHook_NoGroupServices(t *testing.T) { require.NoError(t, h.PreTaskRestart()) ops := consulClient.GetOps() - require.Len(t, ops, 7) + require.Len(t, ops, 5) require.Equal(t, "add", ops[0].Op) // Prerun require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun (1st) - require.Equal(t, "remove", ops[3].Op) // Postrun (2nd) - require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st) - require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd) - require.Equal(t, "add", ops[6].Op) // Restart -> preRun + require.Equal(t, "remove", ops[2].Op) // Postrun + require.Equal(t, "remove", ops[3].Op) // Restart -> preKill + require.Equal(t, "add", ops[4].Op) // Restart -> preRun } // TestGroupServiceHook_ShutdownDelayUpdate asserts calling group service hooks @@ -127,14 +125,12 @@ func TestGroupServiceHook_GroupServices(t *testing.T) { require.NoError(t, h.PreTaskRestart()) ops := consulClient.GetOps() - require.Len(t, ops, 7) + require.Len(t, ops, 5) require.Equal(t, "add", ops[0].Op) // Prerun require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun (1st) - require.Equal(t, "remove", ops[3].Op) // Postrun (2nd) - require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st) - require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd) - require.Equal(t, "add", ops[6].Op) // Restart -> preRun + require.Equal(t, "remove", ops[2].Op) // Postrun + require.Equal(t, "remove", ops[3].Op) // Restart -> preKill + require.Equal(t, "add", ops[4].Op) // Restart -> preRun } // TestGroupServiceHook_Error asserts group service hooks with group @@ -175,14 +171,12 @@ func TestGroupServiceHook_NoNetwork(t *testing.T) { require.NoError(t, h.PreTaskRestart()) ops := consulClient.GetOps() - require.Len(t, ops, 7) + require.Len(t, ops, 5) require.Equal(t, "add", ops[0].Op) // Prerun require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun (1st) - require.Equal(t, "remove", ops[3].Op) // Postrun (2nd) - require.Equal(t, "remove", ops[4].Op) // Restart -> preKill (1st) - require.Equal(t, "remove", ops[5].Op) // Restart -> preKill (2nd) - require.Equal(t, "add", ops[6].Op) // Restart -> preRun + require.Equal(t, "remove", ops[2].Op) // Postrun + require.Equal(t, "remove", ops[3].Op) // Restart -> preKill + require.Equal(t, "add", ops[4].Op) // Restart -> preRun } func TestGroupServiceHook_getWorkloadServices(t *testing.T) { diff --git a/client/allocrunner/taskrunner/service_hook.go b/client/allocrunner/taskrunner/service_hook.go index 1cf2b5f65..95bf1d214 100644 --- a/client/allocrunner/taskrunner/service_hook.go +++ b/client/allocrunner/taskrunner/service_hook.go @@ -171,13 +171,10 @@ func (h *serviceHook) Exited(context.Context, *interfaces.TaskExitedRequest, *in // deregister services from Consul. func (h *serviceHook) deregister() { - workloadServices := h.getWorkloadServices() - h.consulServices.RemoveWorkload(workloadServices) - - // Canary flag may be getting flipped when the alloc is being - // destroyed, so remove both variations of the service - workloadServices.Canary = !workloadServices.Canary - h.consulServices.RemoveWorkload(workloadServices) + if len(h.services) > 0 { + workloadServices := h.getWorkloadServices() + h.consulServices.RemoveWorkload(workloadServices) + } h.initialRegistration = false } diff --git a/client/allocrunner/taskrunner/service_hook_test.go b/client/allocrunner/taskrunner/service_hook_test.go index 1b577bd54..ad88fabd3 100644 --- a/client/allocrunner/taskrunner/service_hook_test.go +++ b/client/allocrunner/taskrunner/service_hook_test.go @@ -39,16 +39,15 @@ func TestUpdate_beforePoststart(t *testing.T) { // so Update should again wait on Poststart. require.NoError(t, hook.Exited(context.Background(), &interfaces.TaskExitedRequest{}, &interfaces.TaskExitedResponse{})) + require.Len(t, c.GetOps(), 3) + require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{})) + require.Len(t, c.GetOps(), 3) + require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{})) require.Len(t, c.GetOps(), 4) require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{})) - require.Len(t, c.GetOps(), 4) - require.NoError(t, hook.Poststart(context.Background(), &interfaces.TaskPoststartRequest{}, &interfaces.TaskPoststartResponse{})) require.Len(t, c.GetOps(), 5) + require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{})) + require.Len(t, c.GetOps(), 6) require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{})) require.Len(t, c.GetOps(), 6) - require.NoError(t, hook.PreKilling(context.Background(), &interfaces.TaskPreKillRequest{}, &interfaces.TaskPreKillResponse{})) - require.Len(t, c.GetOps(), 8) - require.NoError(t, hook.Update(context.Background(), &interfaces.TaskUpdateRequest{Alloc: alloc}, &interfaces.TaskUpdateResponse{})) - require.Len(t, c.GetOps(), 8) - } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index 8ef75d3fd..9e81abd31 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -957,17 +957,16 @@ func TestTaskRunner_ShutdownDelay(t *testing.T) { assert.NoError(t, tr.Kill(context.Background(), structs.NewTaskEvent("test"))) }() - // Wait for *2* deregistration calls (due to needing to remove both - // canary tag variants) + // Wait for *1* de-registration calls (all [non-]canary variants removed). + WAIT: for { ops := mockConsul.GetOps() switch n := len(ops); n { - case 1, 2: - // Waiting for both deregistration calls - case 3: + case 1: + // Waiting for single de-registration call. + case 2: require.Equalf(t, "remove", ops[1].Op, "expected deregistration but found: %#v", ops[1]) - require.Equalf(t, "remove", ops[2].Op, "expected deregistration but found: %#v", ops[2]) break WAIT default: // ?! @@ -2401,25 +2400,22 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) { consul := conf.Consul.(*consulapi.MockConsulServiceClient) consulOps := consul.GetOps() - require.Len(t, consulOps, 8) + require.Len(t, consulOps, 5) // Initial add require.Equal(t, "add", consulOps[0].Op) - // Removing canary and non-canary entries on first exit + // Removing entries on first exit require.Equal(t, "remove", consulOps[1].Op) - require.Equal(t, "remove", consulOps[2].Op) // Second add on retry - require.Equal(t, "add", consulOps[3].Op) + require.Equal(t, "add", consulOps[2].Op) - // Removing canary and non-canary entries on retry + // Removing entries on retry + require.Equal(t, "remove", consulOps[3].Op) + + // Removing entries on stop require.Equal(t, "remove", consulOps[4].Op) - require.Equal(t, "remove", consulOps[5].Op) - - // Removing canary and non-canary entries on stop - require.Equal(t, "remove", consulOps[6].Op) - require.Equal(t, "remove", consulOps[7].Op) } // testWaitForTaskToStart waits for the task to be running or fails the test