consul/connect: fixed a bug where restarting proxy tasks failed. (#16815)

The first start of a Consul Connect proxy sidecar triggers a run
of the envoy_version hook which modifies the task config image
entry. The modification takes into account a number of factors to
correctly populate this. Importantly, once the hook has run, it
marks itself as done so the taskrunner will not execute it again.

When the client receives a non-destructive update for the
allocation which the proxy sidecar is a member of, it will update
and overwrite the task definition within the taskerunner. In doing
so it overwrite the modification performed by the hook. If the
allocation is restarted, the envoy_version hook will be skipped as
it previously marked itself as done, and therefore the sidecar
config image is incorrect and causes a driver error.

The fix removes the hook in marking itself as done to the view of
the taskrunner.
This commit is contained in:
James Rasell 2023-04-11 15:56:03 +01:00 committed by GitHub
parent ba728f8f97
commit bc01d47071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 58 deletions

3
.changelog/16815.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where restarting proxy sidecar tasks failed
```

View File

@ -65,7 +65,7 @@ func (_ *envoyVersionHook) Name() string {
return envoyVersionHookName
}
func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, response *ifs.TaskPrestartResponse) error {
func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, _ *ifs.TaskPrestartResponse) error {
// First interpolation of the task image. Typically this turns the default
// ${meta.connect.sidecar_task} into envoyproxy/envoy:v${NOMAD_envoy_version}
// but could be a no-op or some other value if so configured.
@ -76,7 +76,6 @@ func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestart
// - task is a connect sidecar or gateway
// - task image needs ${NOMAD_envoy_version} resolved
if h.skip(request) {
response.Done = true
return nil
}
@ -98,7 +97,6 @@ func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestart
// Set the resulting image.
h.logger.Trace("setting task envoy image", "image", image)
request.Task.Config["image"] = image
response.Done = true
return nil
}

View File

@ -17,7 +17,7 @@ import (
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
"github.com/shoenig/test/must"
)
var (
@ -32,19 +32,19 @@ func TestEnvoyVersionHook_semver(t *testing.T) {
t.Run("with v", func(t *testing.T) {
result, err := semver("v1.2.3")
require.NoError(t, err)
require.Equal(t, "1.2.3", result)
must.NoError(t, err)
must.Eq(t, "1.2.3", result)
})
t.Run("without v", func(t *testing.T) {
result, err := semver("1.2.3")
require.NoError(t, err)
require.Equal(t, "1.2.3", result)
must.NoError(t, err)
must.Eq(t, "1.2.3", result)
})
t.Run("unexpected", func(t *testing.T) {
_, err := semver("foo")
require.EqualError(t, err, "unexpected envoy version format: Malformed version: foo")
must.ErrorContains(t, err, "unexpected envoy version format: Malformed version: foo")
})
}
@ -55,21 +55,21 @@ func TestEnvoyVersionHook_taskImage(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
// empty
})
require.Equal(t, envoy.ImageFormat, result)
must.Eq(t, envoy.ImageFormat, result)
})
t.Run("not a string", func(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
"image": 7, // not a string
})
require.Equal(t, envoy.ImageFormat, result)
must.Eq(t, envoy.ImageFormat, result)
})
t.Run("normal", func(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
"image": "custom/envoy:latest",
})
require.Equal(t, "custom/envoy:latest", result)
must.Eq(t, "custom/envoy:latest", result)
})
}
@ -80,23 +80,23 @@ func TestEnvoyVersionHook_tweakImage(t *testing.T) {
t.Run("legacy", func(t *testing.T) {
result, err := (*envoyVersionHook)(nil).tweakImage(image, nil)
require.NoError(t, err)
require.Equal(t, envoy.FallbackImage, result)
must.NoError(t, err)
must.Eq(t, envoy.FallbackImage, result)
})
t.Run("unexpected", func(t *testing.T) {
_, err := (*envoyVersionHook)(nil).tweakImage(image, map[string][]string{
"envoy": {"foo", "bar", "baz"},
})
require.EqualError(t, err, "unexpected envoy version format: Malformed version: foo")
must.ErrorContains(t, err, "unexpected envoy version format: Malformed version: foo")
})
t.Run("standard envoy", func(t *testing.T) {
result, err := (*envoyVersionHook)(nil).tweakImage(image, map[string][]string{
"envoy": {"1.15.0", "1.14.4", "1.13.4", "1.12.6"},
})
require.NoError(t, err)
require.Equal(t, "envoyproxy/envoy:v1.15.0", result)
must.NoError(t, err)
must.Eq(t, "envoyproxy/envoy:v1.15.0", result)
})
t.Run("custom image", func(t *testing.T) {
@ -104,8 +104,8 @@ func TestEnvoyVersionHook_tweakImage(t *testing.T) {
result, err := (*envoyVersionHook)(nil).tweakImage(custom, map[string][]string{
"envoy": {"1.15.0", "1.14.4", "1.13.4", "1.12.6"},
})
require.NoError(t, err)
require.Equal(t, "custom-1.15.0/envoy:1.15.0", result)
must.NoError(t, err)
must.Eq(t, "custom-1.15.0/envoy:1.15.0", result)
})
}
@ -119,7 +119,7 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
Config: map[string]interface{}{"image": envoy.SidecarConfigVar},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, envoy.ImageFormat, task.Config["image"])
must.Eq(t, envoy.ImageFormat, task.Config["image"])
})
t.Run("default gateway", func(t *testing.T) {
@ -127,7 +127,7 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
Config: map[string]interface{}{"image": envoy.GatewayConfigVar},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, envoy.ImageFormat, task.Config["image"])
must.Eq(t, envoy.ImageFormat, task.Config["image"])
})
t.Run("custom static", func(t *testing.T) {
@ -135,7 +135,7 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
Config: map[string]interface{}{"image": "custom/envoy"},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, "custom/envoy", task.Config["image"])
must.Eq(t, "custom/envoy", task.Config["image"])
})
t.Run("custom interpolated", func(t *testing.T) {
@ -147,7 +147,7 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
}, map[string]string{
"MY_ENVOY": "my/envoy",
}, nil, nil, "", ""))
require.Equal(t, "my/envoy", task.Config["image"])
must.Eq(t, "my/envoy", task.Config["image"])
})
t.Run("no image", func(t *testing.T) {
@ -155,7 +155,7 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
Config: map[string]interface{}{},
}
hook.interpolateImage(task, taskEnvDefault)
require.Empty(t, task.Config)
must.MapEmpty(t, task.Config)
})
}
@ -171,7 +171,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
Config: nil,
},
})
require.True(t, skip)
must.True(t, skip)
})
t.Run("not connect", func(t *testing.T) {
@ -181,7 +181,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
Kind: "",
},
})
require.True(t, skip)
must.True(t, skip)
})
t.Run("version not needed", func(t *testing.T) {
@ -194,7 +194,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.True(t, skip)
must.True(t, skip)
})
t.Run("version needed custom", func(t *testing.T) {
@ -207,7 +207,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.False(t, skip)
must.False(t, skip)
})
t.Run("version needed standard", func(t *testing.T) {
@ -220,7 +220,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.False(t, skip)
must.False(t, skip)
})
}
@ -252,19 +252,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_standard(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the Task.Config[image] is concrete
require.Equal(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
}
func TestTaskRunner_EnvoyVersionHook_Prestart_custom(t *testing.T) {
@ -296,19 +293,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_custom(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the Task.Config[image] is concrete
require.Equal(t, "custom-1.14.1:latest", request.Task.Config["image"])
must.Eq(t, "custom-1.14.1:latest", request.Task.Config["image"])
}
func TestTaskRunner_EnvoyVersionHook_Prestart_skip(t *testing.T) {
@ -343,19 +337,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_skip(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the Task.Config[image] does not get set
require.Empty(t, request.Task.Config["image"])
must.MapNotContainsKey(t, request.Task.Config, "image")
}
func TestTaskRunner_EnvoyVersionHook_Prestart_fallback(t *testing.T) {
@ -384,19 +375,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_fallback(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))
// Assert the Task.Config[image] is the fallback image
require.Equal(t, "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09", request.Task.Config["image"])
must.Eq(t, "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09", request.Task.Config["image"])
}
func TestTaskRunner_EnvoyVersionHook_Prestart_error(t *testing.T) {
@ -425,15 +413,63 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_error(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook, error should be recoverable
err := h.Prestart(context.Background(), request, &response)
require.EqualError(t, err, "error retrieving supported Envoy versions from Consul: some consul error")
// Assert the hook is not Done
require.False(t, response.Done)
must.ErrorContains(t, err, "error retrieving supported Envoy versions from Consul: some consul error")
}
func TestTaskRunner_EnvoyVersionHook_Prestart_restart(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
// Setup an Allocation
alloc := mock.ConnectAlloc()
alloc.Job.TaskGroups[0].Tasks[0] = mock.ConnectSidecarTask()
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "EnvoyVersionHook", alloc.ID)
defer cleanupDir()
// Set up a mock for Consul API.
mockProxiesAPI := consul.MockSupportedProxiesAPI{
Value: map[string][]string{
"envoy": {"1.15.0", "1.14.4"},
},
Error: nil,
}
// Run envoy_version hook
h := newEnvoyVersionHook(newEnvoyVersionHookConfig(alloc, mockProxiesAPI, logger))
// Create a prestart request
request := &ifs.TaskPrestartRequest{
Task: alloc.Job.TaskGroups[0].Tasks[0],
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
must.NoError(t, request.TaskDir.Build(false, nil))
// Prepare a response
var response ifs.TaskPrestartResponse
// Run the hook and ensure the tasks image has been modified.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
// Overwrite the previously modified image. This is the same behaviour that
// occurs when the server sends a non-destructive allocation update.
request.Task.Config["image"] = "${meta.connect.sidecar_image}"
// Run the Prestart hook function again, and ensure the image is updated.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
// Run the hook again, and ensure the config is still the same mimicking
// a non-user initiated restart.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
}