open-nomad/client/allocrunner/fail_hook.go
Daniel Bennett a7ed6f5c53
full task cleanup when alloc prerun hook fails (#17104)
to avoid leaking task resources (e.g. containers,
iptables) if allocRunner prerun fails during
restore on client restart.

now if prerun fails, TaskRunner.MarkFailedKill()
will only emit an event, mark the task as failed,
and cancel the tr's killCtx, so then ar.runTasks()
-> tr.Run() can take care of the actual cleanup.

removed from (formerly) tr.MarkFailedDead(),
now handled by tr.Run():
 * set task state as dead
 * save task runner local state
 * task stop hooks

also done in tr.Run() now that it's not skipped:
 * handleKill() to kill tasks while respecting
   their shutdown delay, and retrying as needed
   * also includes task preKill hooks
 * clearDriverHandle() to destroy the task
   and associated resources
 * task exited hooks
2023-05-08 13:17:10 -05:00

119 lines
2.6 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
// FailHook is designed to fail for testing purposes,
// so should never be included in a release.
//go:build !release
package allocrunner
import (
"errors"
"fmt"
"os"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/hcl/v2/hclsimple"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
)
var ErrFailHookError = errors.New("failed successfully")
func NewFailHook(l hclog.Logger, name string) *FailHook {
return &FailHook{
name: name,
logger: l.Named(name),
}
}
type FailHook struct {
name string
logger hclog.Logger
Fail struct {
Prerun bool `hcl:"prerun,optional"`
PreKill bool `hcl:"prekill,optional"`
Postrun bool `hcl:"postrun,optional"`
Destroy bool `hcl:"destroy,optional"`
Update bool `hcl:"update,optional"`
PreTaskRestart bool `hcl:"pretaskrestart,optional"`
Shutdown bool `hcl:"shutdown,optional"`
}
}
func (h *FailHook) Name() string {
return h.name
}
func (h *FailHook) LoadConfig(path string) *FailHook {
if _, err := os.Stat(path); os.IsNotExist(err) {
h.logger.Error("couldn't load config", "error", err)
return h
}
if err := hclsimple.DecodeFile(path, nil, &h.Fail); err != nil {
h.logger.Error("error parsing config", "path", path, "error", err)
}
return h
}
var _ interfaces.RunnerPrerunHook = &FailHook{}
func (h *FailHook) Prerun() error {
if h.Fail.Prerun {
return fmt.Errorf("prerun %w", ErrFailHookError)
}
return nil
}
var _ interfaces.RunnerPreKillHook = &FailHook{}
func (h *FailHook) PreKill() {
if h.Fail.PreKill {
h.logger.Error("prekill", "error", ErrFailHookError)
}
}
var _ interfaces.RunnerPostrunHook = &FailHook{}
func (h *FailHook) Postrun() error {
if h.Fail.Postrun {
return fmt.Errorf("postrun %w", ErrFailHookError)
}
return nil
}
var _ interfaces.RunnerDestroyHook = &FailHook{}
func (h *FailHook) Destroy() error {
if h.Fail.Destroy {
return fmt.Errorf("destroy %w", ErrFailHookError)
}
return nil
}
var _ interfaces.RunnerUpdateHook = &FailHook{}
func (h *FailHook) Update(request *interfaces.RunnerUpdateRequest) error {
if h.Fail.Update {
return fmt.Errorf("update %w", ErrFailHookError)
}
return nil
}
var _ interfaces.RunnerTaskRestartHook = &FailHook{}
func (h *FailHook) PreTaskRestart() error {
if h.Fail.PreTaskRestart {
return fmt.Errorf("destroy %w", ErrFailHookError)
}
return nil
}
var _ interfaces.ShutdownHook = &FailHook{}
func (h *FailHook) Shutdown() {
if h.Fail.Shutdown {
h.logger.Error("shutdown", "error", ErrFailHookError)
}
}