task runner: fix goroutine leak in prestart hook (#11741)

The task runner prestart hooks take a `joincontext` so they have the
option to exit early if either of two contexts are canceled: from
killing the task or client shutdown. Some tasks exit without being
shutdown from the server, so neither of the joined contexts ever gets
canceled and we leak the `joincontext` (48 bytes) and its internal
goroutine. This primarily impacts batch jobs and any task that fails
or completes early such as non-sidecar prestart lifecycle tasks.
Cancel the `joincontext` after the prestart call exits to fix the
leak.
This commit is contained in:
Tim Gross 2021-12-23 11:50:51 -05:00 committed by GitHub
parent 430d94b81d
commit 265e488ab4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 4 deletions

3
.changelog/11741.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a memory and goroutine leak for batch tasks and any task that exits without being shut down from the server
```

View File

@ -89,7 +89,9 @@ type TaskPrestartHook interface {
// Prestart is called before the task is started including after every // Prestart is called before the task is started including after every
// restart. Prestart is not called if the allocation is terminal. // restart. Prestart is not called if the allocation is terminal.
// //
// The context is cancelled if the task is killed or shutdown. // The context is cancelled if the task is killed or shutdown but
// should not be stored any persistent goroutines this Prestart
// creates.
Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error
} }

View File

@ -190,6 +190,11 @@ func (tr *TaskRunner) prestart() error {
}() }()
} }
// use a join context to allow any blocking pre-start hooks
// to be canceled by either killCtx or shutdownCtx
joinedCtx, joinedCancel := joincontext.Join(tr.killCtx, tr.shutdownCtx)
defer joinedCancel()
for _, hook := range tr.runnerHooks { for _, hook := range tr.runnerHooks {
pre, ok := hook.(interfaces.TaskPrestartHook) pre, ok := hook.(interfaces.TaskPrestartHook)
if !ok { if !ok {
@ -235,9 +240,6 @@ func (tr *TaskRunner) prestart() error {
} }
// Run the prestart hook // Run the prestart hook
// use a joint context to allow any blocking pre-start hooks
// to be canceled by either killCtx or shutdownCtx
joinedCtx, _ := joincontext.Join(tr.killCtx, tr.shutdownCtx)
var resp interfaces.TaskPrestartResponse var resp interfaces.TaskPrestartResponse
if err := pre.Prestart(joinedCtx, &req, &resp); err != nil { if err := pre.Prestart(joinedCtx, &req, &resp); err != nil {
tr.emitHookError(err, name) tr.emitHookError(err, name)