128 lines
3.9 KiB
Go
128 lines
3.9 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package taskrunner
|
|
|
|
import (
|
|
"context"
|
|
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
)
|
|
|
|
var _ interfaces.TaskPrestartHook = (*remoteTaskHook)(nil)
|
|
var _ interfaces.TaskPreKillHook = (*remoteTaskHook)(nil)
|
|
|
|
// remoteTaskHook reattaches to remotely executing tasks.
|
|
type remoteTaskHook struct {
|
|
tr *TaskRunner
|
|
|
|
logger hclog.Logger
|
|
}
|
|
|
|
func newRemoteTaskHook(tr *TaskRunner, logger hclog.Logger) interfaces.TaskHook {
|
|
h := &remoteTaskHook{
|
|
tr: tr,
|
|
}
|
|
h.logger = logger.Named(h.Name())
|
|
return h
|
|
}
|
|
|
|
func (h *remoteTaskHook) Name() string {
|
|
return "remote_task"
|
|
}
|
|
|
|
// Prestart performs 2 remote task driver related tasks:
|
|
// 1. If there is no local handle, see if there is a handle propagated from a
|
|
// previous alloc to be restored.
|
|
// 2. If the alloc is lost make sure the task signal is set to detach instead
|
|
// of kill.
|
|
func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
|
|
if h.tr.getDriverHandle() != nil {
|
|
// Driver handle already exists so don't try to load remote
|
|
// task handle
|
|
return nil
|
|
}
|
|
|
|
h.tr.stateLock.Lock()
|
|
th := drivers.NewTaskHandleFromState(h.tr.state)
|
|
h.tr.stateLock.Unlock()
|
|
|
|
// Task handle will be nil if there was no previous allocation or if
|
|
// this is a destructive update
|
|
if th == nil {
|
|
resp.Done = true
|
|
return nil
|
|
}
|
|
|
|
// The task config is unique per invocation so recreate it here
|
|
th.Config = h.tr.buildTaskConfig()
|
|
|
|
if err := h.tr.driver.RecoverTask(th); err != nil {
|
|
// Soft error here to let a new instance get started instead of
|
|
// failing the task since retrying is unlikely to help.
|
|
h.logger.Error("error recovering task state", "error", err)
|
|
return nil
|
|
}
|
|
|
|
taskInfo, err := h.tr.driver.InspectTask(th.Config.ID)
|
|
if err != nil {
|
|
// Soft error here to let a new instance get started instead of
|
|
// failing the task since retrying is unlikely to help.
|
|
h.logger.Error("error inspecting recovered task state", "error", err)
|
|
return nil
|
|
}
|
|
|
|
h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), h.tr.clientConfig.MaxKillTimeout, taskInfo.NetworkOverride))
|
|
|
|
h.tr.stateLock.Lock()
|
|
h.tr.localState.TaskHandle = th
|
|
h.tr.localState.DriverNetwork = taskInfo.NetworkOverride
|
|
h.tr.stateLock.Unlock()
|
|
|
|
// Ensure the signal is set according to the allocation's state
|
|
h.setSignal(h.tr.Alloc())
|
|
|
|
// Emit TaskStarted manually since the normal task runner logic will
|
|
// treat this task like a restored task and skip emitting started.
|
|
h.tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
|
|
|
|
return nil
|
|
}
|
|
|
|
// PreKilling tells the remote task driver to detach a remote task instead of
|
|
// stopping it.
|
|
func (h *remoteTaskHook) PreKilling(ctx context.Context, req *interfaces.TaskPreKillRequest, resp *interfaces.TaskPreKillResponse) error {
|
|
alloc := h.tr.Alloc()
|
|
h.setSignal(alloc)
|
|
return nil
|
|
}
|
|
|
|
// setSignal to detach if the allocation is lost or draining. Safe to call
|
|
// multiple times as it only transitions to using detach -- never back to kill.
|
|
func (h *remoteTaskHook) setSignal(alloc *structs.Allocation) {
|
|
driverHandle := h.tr.getDriverHandle()
|
|
if driverHandle == nil {
|
|
// Nothing to do exit early
|
|
return
|
|
}
|
|
|
|
switch {
|
|
case alloc.ClientStatus == structs.AllocClientStatusLost:
|
|
// Continue on; lost allocs should just detach
|
|
h.logger.Debug("detaching from remote task since alloc was lost")
|
|
case alloc.DesiredTransition.ShouldMigrate():
|
|
// Continue on; migrating allocs should just detach
|
|
h.logger.Debug("detaching from remote task since alloc was drained")
|
|
default:
|
|
// Nothing to do exit early
|
|
return
|
|
}
|
|
|
|
// Set DetachSignal to indicate to the remote task driver that it
|
|
// should detach this remote task and ignore it.
|
|
driverHandle.SetKillSignal(drivers.DetachSignal)
|
|
}
|