client: always run alloc cleanup hooks on final update (#15855)

* client: run alloc pre-kill hooks on last pass despite no live tasks

This PR fixes a bug where alloc pre-kill hooks were not run in the
edge case where there are no live tasks remaining, but it is also
the final update to process for the (terminal) allocation. We need
to run cleanup hooks here, otherwise they will not run until the
allocation gets garbage collected (i.e. via Destroy()), possibly
at a distant time in the future.

Fixes #15477

* client: do not run ar cleanup hooks if client is shutting down
This commit is contained in:
Seth Hoenig 2023-01-27 09:59:31 -06:00 committed by GitHub
parent de87cdc816
commit 0fac4e19b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 58 additions and 2 deletions

3
.changelog/15477.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where allocation cleanup hooks would not run
```

View File

@ -125,7 +125,7 @@ type allocRunner struct {
allocDir *allocdir.AllocDir
// runnerHooks are alloc runner lifecycle hooks that should be run on state
// transistions.
// transitions.
runnerHooks []interfaces.RunnerHook
// hookState is the output of allocrunner hooks
@ -546,7 +546,9 @@ func (ar *allocRunner) handleTaskStateUpdates() {
}
}
// kill remaining live tasks
if len(liveRunners) > 0 {
// if all live runners are sidecars - kill alloc
onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners)
if killEvent == nil && onlySidecarsRemaining {
@ -586,6 +588,14 @@ func (ar *allocRunner) handleTaskStateUpdates() {
}
}
} else {
// there are no live runners left
// run AR pre-kill hooks if this alloc is done, but not if it's because
// the agent is shutting down.
if !ar.isShuttingDown() && done {
ar.preKillHooks()
}
// If there are no live runners left kill all non-poststop task
// runners to unblock them from the alloc restart loop.
for _, tr := range ar.tasks {

View File

@ -329,6 +329,7 @@ func (ar *allocRunner) destroy() error {
func (ar *allocRunner) preKillHooks() {
for _, hook := range ar.runnerHooks {
pre, ok := hook.(interfaces.RunnerPreKillHook)
if !ok {
continue
}

View File

@ -6,6 +6,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"sync/atomic"
"testing"
"time"
@ -23,6 +24,8 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
"github.com/stretchr/testify/require"
)
@ -2398,3 +2401,43 @@ func TestHasSidecarTasks(t *testing.T) {
})
}
}
type allocPreKillHook struct {
ran atomic.Bool
}
func (*allocPreKillHook) Name() string { return "test_prekill" }
func (h *allocPreKillHook) PreKill() {
h.ran.Store(true)
}
func TestAllocRunner_PreKill_RunOnDone(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Driver = "mock_driver"
task.Config = map[string]interface{}{"run_for": "2ms"}
alloc.DesiredStatus = "stop"
conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
t.Cleanup(cleanup)
ar, err := NewAllocRunner(conf)
must.NoError(t, err)
// set our custom prekill hook
hook := new(allocPreKillHook)
ar.runnerHooks = append(ar.runnerHooks, hook)
go ar.Run()
defer destroy(ar)
// wait for completion or timeout
must.Wait(t, wait.InitialSuccess(
wait.BoolFunc(hook.ran.Load),
wait.Timeout(5*time.Second),
wait.Gap(500*time.Millisecond),
))
}

View File

@ -1,5 +1,4 @@
//go:build !windows
// +build !windows
package allocrunner