docker: stop network pause container of lost alloc after node restart (#17455)

This PR fixes a bug where the docker network pause container would not be
stopped and removed in the case where a node is restarted, the alloc is
moved to another node, the node comes back up. See the issue below for
full repro conditions.

Basically in the DestroyNetwork PostRun hook we would depend on the
NetworkIsolationSpec field not being nil - which is only the case
if the Client stays alive all the way from network creation to network
teardown. If the node is rebooted we lose that state and previously
would not be able to find the pause container to remove. Now, we manually
find the pause container by scanning them and looking for the associated
allocID.

Fixes #17299
This commit is contained in:
Seth Hoenig 2023-06-09 08:46:29 -05:00 committed by GitHub
parent 944f30674d
commit 557a6b4a5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 142 additions and 38 deletions

3
.changelog/17455.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
docker: Fixed a bug where network pause container would not be removed after node restart
```

View File

@ -178,12 +178,14 @@ func (h *networkHook) Prerun() error {
}
func (h *networkHook) Postrun() error {
if h.spec == nil {
return nil
// we need the spec for network teardown
if h.spec != nil {
if err := h.networkConfigurator.Teardown(context.TODO(), h.alloc, h.spec); err != nil {
h.logger.Error("failed to cleanup network for allocation, resources may have leaked", "alloc", h.alloc.ID, "error", err)
}
}
if err := h.networkConfigurator.Teardown(context.TODO(), h.alloc, h.spec); err != nil {
h.logger.Error("failed to cleanup network for allocation, resources may have leaked", "alloc", h.alloc.ID, "error", err)
}
// issue driver destroy regardless if we have a spec (e.g. cleanup pause container)
return h.manager.DestroyNetwork(h.alloc.ID, h.spec)
}

View File

@ -14,7 +14,8 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/stretchr/testify/require"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
)
// statically assert network hook implements the expected interfaces
@ -29,7 +30,7 @@ type mockNetworkIsolationSetter struct {
func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIsolationSpec) {
m.called = true
require.Exactly(m.t, m.expectedSpec, spec)
test.Eq(m.t, m.expectedSpec, spec)
}
type mockNetworkStatusSetter struct {
@ -40,20 +41,19 @@ type mockNetworkStatusSetter struct {
func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) {
m.called = true
require.Exactly(m.t, m.expectedStatus, status)
test.Eq(m.t, m.expectedStatus, status)
}
// Test that the prerun and postrun hooks call the setter with the expected spec when
// the network mode is not host
func TestNetworkHook_Prerun_Postrun(t *testing.T) {
// Test that the prerun and postrun hooks call the setter with the expected
// NetworkIsolationSpec for group bridge network.
func TestNetworkHook_Prerun_Postrun_group(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{
Mode: "bridge",
},
{Mode: "bridge"},
}
spec := &drivers.NetworkIsolationSpec{
Mode: drivers.NetIsolationModeGroup,
Path: "test",
@ -64,14 +64,14 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
require.Equal(t, alloc.ID, allocID)
test.Eq(t, alloc.ID, allocID)
return spec, false, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
destroyCalled = true
require.Equal(t, alloc.ID, allocID)
require.Exactly(t, spec, netSpec)
test.Eq(t, alloc.ID, allocID)
test.Eq(t, spec, netSpec)
return nil
},
},
@ -84,26 +84,56 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
t: t,
expectedStatus: nil,
}
require := require.New(t)
envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)
logger := testlog.HCLogger(t)
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
require.NoError(hook.Prerun())
require.True(setter.called)
require.False(destroyCalled)
require.NoError(hook.Postrun())
require.True(destroyCalled)
// reset and use host network mode
setter.called = false
destroyCalled = false
alloc.Job.TaskGroups[0].Networks[0].Mode = "host"
hook = newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
require.NoError(hook.Prerun())
require.False(setter.called)
require.False(destroyCalled)
require.NoError(hook.Postrun())
require.False(destroyCalled)
must.NoError(t, hook.Prerun())
must.True(t, setter.called)
must.False(t, destroyCalled)
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}
// Test that prerun and postrun hooks do not expect a NetworkIsolationSpec
func TestNetworkHook_Prerun_Postrun_host(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{
{Mode: "host"},
}
destroyCalled := false
nm := &testutils.MockDriver{
MockNetworkManager: testutils.MockNetworkManager{
CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) {
test.Unreachable(t, test.Sprintf("should not call CreateNetwork for host network"))
return nil, false, nil
},
DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error {
destroyCalled = true
test.Nil(t, netSpec)
return nil
},
},
}
setter := &mockNetworkIsolationSetter{
t: t,
expectedSpec: nil,
}
statusSetter := &mockNetworkStatusSetter{
t: t,
expectedStatus: nil,
}
envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region)
logger := testlog.HCLogger(t)
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter, envBuilder.Build())
must.NoError(t, hook.Prerun())
must.False(t, setter.called)
must.False(t, destroyCalled)
must.NoError(t, hook.Postrun())
must.True(t, destroyCalled)
}

View File

@ -152,6 +152,9 @@ func (*defaultNetworkManager) CreateNetwork(allocID string, _ *drivers.NetworkCr
}
func (*defaultNetworkManager) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSpec) error {
if spec == nil {
return nil
}
return nsutil.UnmountNS(spec.Path)
}

View File

@ -35,6 +35,7 @@ import (
"github.com/hashicorp/nomad/plugins/drivers"
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/ryanuber/go-glob"
"golang.org/x/exp/slices"
)
var (
@ -760,6 +761,39 @@ func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConf
return binds, nil
}
func (d *Driver) findPauseContainer(allocID string) (string, error) {
_, dockerClient, err := d.dockerClients()
if err != nil {
return "", err
}
containers, listErr := dockerClient.ListContainers(docker.ListContainersOptions{
Context: d.ctx,
All: false, // running only
Filters: map[string][]string{
"label": {dockerLabelAllocID},
},
})
if listErr != nil {
d.logger.Error("failed to list pause containers for recovery", "error", listErr)
return "", listErr
}
for _, c := range containers {
if !slices.ContainsFunc(c.Names, func(s string) bool {
return strings.HasPrefix(s, "/nomad_init_")
}) {
continue
}
if c.Labels[dockerLabelAllocID] == allocID {
return c.ID, nil
}
}
return "", nil
}
// recoverPauseContainers gets called when we start up the plugin. On client
// restarts we need to rebuild the set of pause containers we are
// tracking. Basically just scan all containers and pull the ID from anything
@ -779,7 +813,7 @@ func (d *Driver) recoverPauseContainers(ctx context.Context) {
},
})
if listErr != nil && listErr != ctx.Err() {
d.logger.Error("failed to list pause containers", "error", listErr)
d.logger.Error("failed to list pause containers for recovery", "error", listErr)
return
}

View File

@ -93,7 +93,30 @@ func (d *Driver) CreateNetwork(allocID string, createSpec *drivers.NetworkCreate
}
func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSpec) error {
id := spec.Labels[dockerNetSpecLabelKey]
var (
id string
err error
)
if spec != nil {
// if we have the spec we can just read the container id
id = spec.Labels[dockerNetSpecLabelKey]
} else {
// otherwise we need to scan all the containers and find the pause container
// associated with this allocation - this happens when the client is
// restarted since we do not persist the network spec
id, err = d.findPauseContainer(allocID)
}
if err != nil {
return err
}
if id == "" {
d.logger.Debug("failed to find pause container to cleanup", "alloc_id", allocID)
return nil
}
// no longer tracking this pause container; even if we fail here we should
// let the background reconciliation keep trying
@ -104,11 +127,16 @@ func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSp
return fmt.Errorf("failed to connect to docker daemon: %s", err)
}
timeout := uint(1) // this is the pause container, just kill it fast
if err := client.StopContainerWithContext(id, timeout, d.ctx); err != nil {
d.logger.Warn("failed to stop pause container", "id", id, "error", err)
}
if err := client.RemoveContainer(docker.RemoveContainerOptions{
Force: true,
ID: id,
}); err != nil {
return err
return fmt.Errorf("failed to remove pause container: %w", err)
}
if d.config.GC.Image {

View File

@ -490,6 +490,10 @@ func (d *driverPluginClient) CreateNetwork(allocID string, _ *NetworkCreateReque
}
func (d *driverPluginClient) DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error {
if spec == nil {
return nil
}
req := &proto.DestroyNetworkRequest{
AllocId: allocID,
IsolationSpec: NetworkIsolationSpecToProto(spec),