From 557a6b4a5e7f2aa5b74fc83bc212915e36829293 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 9 Jun 2023 08:46:29 -0500 Subject: [PATCH] docker: stop network pause container of lost alloc after node restart (#17455) This PR fixes a bug where the docker network pause container would not be stopped and removed in the case where a node is restarted, the alloc is moved to another node, the node comes back up. See the issue below for full repro conditions. Basically in the DestroyNetwork PostRun hook we would depend on the NetworkIsolationSpec field not being nil - which is only the case if the Client stays alive all the way from network creation to network teardown. If the node is rebooted we lose that state and previously would not be able to find the pause container to remove. Now, we manually find the pause container by scanning them and looking for the associated allocID. Fixes #17299 --- .changelog/17455.txt | 3 + client/allocrunner/network_hook.go | 12 +-- client/allocrunner/network_hook_test.go | 90 ++++++++++++++------- client/allocrunner/network_manager_linux.go | 3 + drivers/docker/driver.go | 36 ++++++++- drivers/docker/network.go | 32 +++++++- plugins/drivers/client.go | 4 + 7 files changed, 142 insertions(+), 38 deletions(-) create mode 100644 .changelog/17455.txt diff --git a/.changelog/17455.txt b/.changelog/17455.txt new file mode 100644 index 000000000..6f5ffc3be --- /dev/null +++ b/.changelog/17455.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where network pause container would not be removed after node restart +``` diff --git a/client/allocrunner/network_hook.go b/client/allocrunner/network_hook.go index d822f6508..bc85e9ce4 100644 --- a/client/allocrunner/network_hook.go +++ b/client/allocrunner/network_hook.go @@ -178,12 +178,14 @@ func (h *networkHook) Prerun() error { } func (h *networkHook) Postrun() error { - if h.spec == nil { - return nil + + // we need the spec for network teardown + if h.spec != nil { + if err := h.networkConfigurator.Teardown(context.TODO(), h.alloc, h.spec); err != nil { + h.logger.Error("failed to cleanup network for allocation, resources may have leaked", "alloc", h.alloc.ID, "error", err) + } } - if err := h.networkConfigurator.Teardown(context.TODO(), h.alloc, h.spec); err != nil { - h.logger.Error("failed to cleanup network for allocation, resources may have leaked", "alloc", h.alloc.ID, "error", err) - } + // issue driver destroy regardless if we have a spec (e.g. cleanup pause container) return h.manager.DestroyNetwork(h.alloc.ID, h.spec) } diff --git a/client/allocrunner/network_hook_test.go b/client/allocrunner/network_hook_test.go index 07d9cabd7..bda7f0ed0 100644 --- a/client/allocrunner/network_hook_test.go +++ b/client/allocrunner/network_hook_test.go @@ -14,7 +14,8 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers/testutils" - "github.com/stretchr/testify/require" + "github.com/shoenig/test" + "github.com/shoenig/test/must" ) // statically assert network hook implements the expected interfaces @@ -29,7 +30,7 @@ type mockNetworkIsolationSetter struct { func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIsolationSpec) { m.called = true - require.Exactly(m.t, m.expectedSpec, spec) + test.Eq(m.t, m.expectedSpec, spec) } type mockNetworkStatusSetter struct { @@ -40,20 +41,19 @@ type mockNetworkStatusSetter struct { func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) { m.called = true - require.Exactly(m.t, m.expectedStatus, status) + test.Eq(m.t, m.expectedStatus, status) } -// Test that the prerun and postrun hooks call the setter with the expected spec when -// the network mode is not host -func TestNetworkHook_Prerun_Postrun(t *testing.T) { +// Test that the prerun and postrun hooks call the setter with the expected +// NetworkIsolationSpec for group bridge network. +func TestNetworkHook_Prerun_Postrun_group(t *testing.T) { ci.Parallel(t) alloc := mock.Alloc() alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{ - { - Mode: "bridge", - }, + {Mode: "bridge"}, } + spec := &drivers.NetworkIsolationSpec{ Mode: drivers.NetIsolationModeGroup, Path: "test", @@ -64,14 +64,14 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) { nm := &testutils.MockDriver{ MockNetworkManager: testutils.MockNetworkManager{ CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { - require.Equal(t, alloc.ID, allocID) + test.Eq(t, alloc.ID, allocID) return spec, false, nil }, DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { destroyCalled = true - require.Equal(t, alloc.ID, allocID) - require.Exactly(t, spec, netSpec) + test.Eq(t, alloc.ID, allocID) + test.Eq(t, spec, netSpec) return nil }, }, @@ -84,26 +84,56 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) { t: t, expectedStatus: nil, } - require := require.New(t) envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region) - logger := testlog.HCLogger(t) hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build()) - require.NoError(hook.Prerun()) - require.True(setter.called) - require.False(destroyCalled) - require.NoError(hook.Postrun()) - require.True(destroyCalled) - - // reset and use host network mode - setter.called = false - destroyCalled = false - alloc.Job.TaskGroups[0].Networks[0].Mode = "host" - hook = newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build()) - require.NoError(hook.Prerun()) - require.False(setter.called) - require.False(destroyCalled) - require.NoError(hook.Postrun()) - require.False(destroyCalled) + must.NoError(t, hook.Prerun()) + must.True(t, setter.called) + must.False(t, destroyCalled) + must.NoError(t, hook.Postrun()) + must.True(t, destroyCalled) +} + +// Test that prerun and postrun hooks do not expect a NetworkIsolationSpec +func TestNetworkHook_Prerun_Postrun_host(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{ + {Mode: "host"}, + } + + destroyCalled := false + nm := &testutils.MockDriver{ + MockNetworkManager: testutils.MockNetworkManager{ + CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { + test.Unreachable(t, test.Sprintf("should not call CreateNetwork for host network")) + return nil, false, nil + }, + + DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { + destroyCalled = true + test.Nil(t, netSpec) + return nil + }, + }, + } + setter := &mockNetworkIsolationSetter{ + t: t, + expectedSpec: nil, + } + statusSetter := &mockNetworkStatusSetter{ + t: t, + expectedStatus: nil, + } + + envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region) + logger := testlog.HCLogger(t) + hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build()) + must.NoError(t, hook.Prerun()) + must.False(t, setter.called) + must.False(t, destroyCalled) + must.NoError(t, hook.Postrun()) + must.True(t, destroyCalled) } diff --git a/client/allocrunner/network_manager_linux.go b/client/allocrunner/network_manager_linux.go index f86f39d62..0b7d4cc0a 100644 --- a/client/allocrunner/network_manager_linux.go +++ b/client/allocrunner/network_manager_linux.go @@ -152,6 +152,9 @@ func (*defaultNetworkManager) CreateNetwork(allocID string, _ *drivers.NetworkCr } func (*defaultNetworkManager) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSpec) error { + if spec == nil { + return nil + } return nsutil.UnmountNS(spec.Path) } diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index ccc3aa0cd..0c3227de1 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -35,6 +35,7 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" pstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/ryanuber/go-glob" + "golang.org/x/exp/slices" ) var ( @@ -760,6 +761,39 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf return binds, nil } +func (d *Driver) findPauseContainer(allocID string) (string, error) { + + _, dockerClient, err := d.dockerClients() + if err != nil { + return "", err + } + + containers, listErr := dockerClient.ListContainers(docker.ListContainersOptions{ + Context: d.ctx, + All: false, // running only + Filters: map[string][]string{ + "label": {dockerLabelAllocID}, + }, + }) + if listErr != nil { + d.logger.Error("failed to list pause containers for recovery", "error", listErr) + return "", listErr + } + + for _, c := range containers { + if !slices.ContainsFunc(c.Names, func(s string) bool { + return strings.HasPrefix(s, "/nomad_init_") + }) { + continue + } + if c.Labels[dockerLabelAllocID] == allocID { + return c.ID, nil + } + } + + return "", nil +} + // recoverPauseContainers gets called when we start up the plugin. On client // restarts we need to rebuild the set of pause containers we are // tracking. Basically just scan all containers and pull the ID from anything @@ -779,7 +813,7 @@ func (d *Driver) recoverPauseContainers(ctx context.Context) { }, }) if listErr != nil && listErr != ctx.Err() { - d.logger.Error("failed to list pause containers", "error", listErr) + d.logger.Error("failed to list pause containers for recovery", "error", listErr) return } diff --git a/drivers/docker/network.go b/drivers/docker/network.go index 41b458493..e92acd3e5 100644 --- a/drivers/docker/network.go +++ b/drivers/docker/network.go @@ -93,7 +93,30 @@ func (d *Driver) CreateNetwork(allocID string, createSpec *drivers.NetworkCreate } func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSpec) error { - id := spec.Labels[dockerNetSpecLabelKey] + + var ( + id string + err error + ) + + if spec != nil { + // if we have the spec we can just read the container id + id = spec.Labels[dockerNetSpecLabelKey] + } else { + // otherwise we need to scan all the containers and find the pause container + // associated with this allocation - this happens when the client is + // restarted since we do not persist the network spec + id, err = d.findPauseContainer(allocID) + } + + if err != nil { + return err + } + + if id == "" { + d.logger.Debug("failed to find pause container to cleanup", "alloc_id", allocID) + return nil + } // no longer tracking this pause container; even if we fail here we should // let the background reconciliation keep trying @@ -104,11 +127,16 @@ func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSp return fmt.Errorf("failed to connect to docker daemon: %s", err) } + timeout := uint(1) // this is the pause container, just kill it fast + if err := client.StopContainerWithContext(id, timeout, d.ctx); err != nil { + d.logger.Warn("failed to stop pause container", "id", id, "error", err) + } + if err := client.RemoveContainer(docker.RemoveContainerOptions{ Force: true, ID: id, }); err != nil { - return err + return fmt.Errorf("failed to remove pause container: %w", err) } if d.config.GC.Image { diff --git a/plugins/drivers/client.go b/plugins/drivers/client.go index bfa1bc427..c5447dc7c 100644 --- a/plugins/drivers/client.go +++ b/plugins/drivers/client.go @@ -490,6 +490,10 @@ func (d *driverPluginClient) CreateNetwork(allocID string, _ *NetworkCreateReque } func (d *driverPluginClient) DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error { + if spec == nil { + return nil + } + req := &proto.DestroyNetworkRequest{ AllocId: allocID, IsolationSpec: NetworkIsolationSpecToProto(spec),