693f244cce
This PR plumbs the plugins done ctx through the base and driver plugin clients (device already had it). Further, it adds generic handling of gRPC stream errors.
447 lines
12 KiB
Go
447 lines
12 KiB
Go
package exec
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul-template/signals"
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
plugin "github.com/hashicorp/go-plugin"
|
|
"github.com/hashicorp/nomad/client/driver/executor"
|
|
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/drivers/shared/eventer"
|
|
"github.com/hashicorp/nomad/plugins/base"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
"github.com/hashicorp/nomad/plugins/drivers/utils"
|
|
"github.com/hashicorp/nomad/plugins/shared"
|
|
"github.com/hashicorp/nomad/plugins/shared/hclspec"
|
|
"github.com/hashicorp/nomad/plugins/shared/loader"
|
|
)
|
|
|
|
const (
|
|
// pluginName is the name of the plugin
|
|
pluginName = "exec"
|
|
|
|
// fingerprintPeriod is the interval at which the driver will send fingerprint responses
|
|
fingerprintPeriod = 30 * time.Second
|
|
)
|
|
|
|
var (
|
|
// PluginID is the exec plugin metadata registered in the plugin
|
|
// catalog.
|
|
PluginID = loader.PluginID{
|
|
Name: pluginName,
|
|
PluginType: base.PluginTypeDriver,
|
|
}
|
|
|
|
// PluginConfig is the exec driver factory function registered in the
|
|
// plugin catalog.
|
|
PluginConfig = &loader.InternalPluginConfig{
|
|
Config: map[string]interface{}{},
|
|
Factory: func(l hclog.Logger) interface{} { return NewExecDriver(l) },
|
|
}
|
|
|
|
// pluginInfo is the response returned for the PluginInfo RPC
|
|
pluginInfo = &base.PluginInfoResponse{
|
|
Type: base.PluginTypeDriver,
|
|
PluginApiVersion: "0.0.1",
|
|
PluginVersion: "0.1.0",
|
|
Name: pluginName,
|
|
}
|
|
|
|
// configSpec is the hcl specification returned by the ConfigSchema RPC
|
|
configSpec = hclspec.NewObject(map[string]*hclspec.Spec{})
|
|
|
|
// taskConfigSpec is the hcl specification for the driver config section of
|
|
// a task within a job. It is returned in the TaskConfigSchema RPC
|
|
taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
|
|
"command": hclspec.NewAttr("command", "string", true),
|
|
"args": hclspec.NewAttr("args", "list(string)", false),
|
|
})
|
|
|
|
// capabilities is returned by the Capabilities RPC and indicates what
|
|
// optional features this driver supports
|
|
capabilities = &drivers.Capabilities{
|
|
SendSignals: true,
|
|
Exec: true,
|
|
FSIsolation: cstructs.FSIsolationChroot,
|
|
}
|
|
)
|
|
|
|
// ExecDriver fork/execs tasks using many of the underlying OS's isolation
|
|
// features where configured.
|
|
type ExecDriver struct {
|
|
// eventer is used to handle multiplexing of TaskEvents calls such that an
|
|
// event can be broadcast to all callers
|
|
eventer *eventer.Eventer
|
|
|
|
// nomadConfig is the client config from nomad
|
|
nomadConfig *base.ClientDriverConfig
|
|
|
|
// tasks is the in memory datastore mapping taskIDs to execDriverHandles
|
|
tasks *taskStore
|
|
|
|
// ctx is the context for the driver. It is passed to other subsystems to
|
|
// coordinate shutdown
|
|
ctx context.Context
|
|
|
|
// signalShutdown is called when the driver is shutting down and cancels the
|
|
// ctx passed to any subsystems
|
|
signalShutdown context.CancelFunc
|
|
|
|
// logger will log to the plugin output which is usually an 'executor.out'
|
|
// file located in the root of the TaskDir
|
|
logger hclog.Logger
|
|
}
|
|
|
|
// TaskConfig is the driver configuration of a task within a job
|
|
type TaskConfig struct {
|
|
Command string `cty:"command"`
|
|
Args []string `cty:"args"`
|
|
}
|
|
|
|
// TaskState is the state which is encoded in the handle returned in
|
|
// StartTask. This information is needed to rebuild the task state and handler
|
|
// during recovery.
|
|
type TaskState struct {
|
|
ReattachConfig *shared.ReattachConfig
|
|
TaskConfig *drivers.TaskConfig
|
|
Pid int
|
|
StartedAt time.Time
|
|
}
|
|
|
|
// NewExecDriver returns a new DrivePlugin implementation
|
|
func NewExecDriver(logger hclog.Logger) drivers.DriverPlugin {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
logger = logger.Named(pluginName)
|
|
return &ExecDriver{
|
|
eventer: eventer.NewEventer(ctx, logger),
|
|
tasks: newTaskStore(),
|
|
ctx: ctx,
|
|
signalShutdown: cancel,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
func (*ExecDriver) PluginInfo() (*base.PluginInfoResponse, error) {
|
|
return pluginInfo, nil
|
|
}
|
|
|
|
func (*ExecDriver) ConfigSchema() (*hclspec.Spec, error) {
|
|
return configSpec, nil
|
|
}
|
|
|
|
func (d *ExecDriver) SetConfig(_ []byte, cfg *base.ClientAgentConfig) error {
|
|
if cfg != nil {
|
|
d.nomadConfig = cfg.Driver
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *ExecDriver) Shutdown(ctx context.Context) error {
|
|
d.signalShutdown()
|
|
return nil
|
|
}
|
|
|
|
func (d *ExecDriver) TaskConfigSchema() (*hclspec.Spec, error) {
|
|
return taskConfigSpec, nil
|
|
}
|
|
|
|
func (d *ExecDriver) Capabilities() (*drivers.Capabilities, error) {
|
|
return capabilities, nil
|
|
}
|
|
|
|
func (d *ExecDriver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
|
|
ch := make(chan *drivers.Fingerprint)
|
|
go d.handleFingerprint(ctx, ch)
|
|
return ch, nil
|
|
|
|
}
|
|
func (d *ExecDriver) handleFingerprint(ctx context.Context, ch chan *drivers.Fingerprint) {
|
|
defer close(ch)
|
|
ticker := time.NewTimer(0)
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-d.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
ticker.Reset(fingerprintPeriod)
|
|
ch <- d.buildFingerprint()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *ExecDriver) RecoverTask(handle *drivers.TaskHandle) error {
|
|
var taskState TaskState
|
|
logger := d.logger.With("task_id", handle.Config.ID)
|
|
|
|
err := handle.GetDriverState(&taskState)
|
|
if err != nil {
|
|
logger.Error("failed to decode driver state during task recovery", "error", err)
|
|
return fmt.Errorf("failed to decode state: %v", err)
|
|
}
|
|
|
|
plugRC, err := shared.ReattachConfigToGoPlugin(taskState.ReattachConfig)
|
|
if err != nil {
|
|
logger.Error("failed to build reattach config during task recovery", "error", err)
|
|
return fmt.Errorf("failed to build reattach config: %v", err)
|
|
}
|
|
|
|
pluginConfig := &plugin.ClientConfig{
|
|
Reattach: plugRC,
|
|
}
|
|
|
|
exec, pluginClient, err := utils.CreateExecutorWithConfig(pluginConfig, os.Stderr)
|
|
if err != nil {
|
|
logger.Error("failed to build executor during task recovery", "error", err)
|
|
return fmt.Errorf("failed to build executor: %v", err)
|
|
}
|
|
|
|
h := &execTaskHandle{
|
|
exec: exec,
|
|
pid: taskState.Pid,
|
|
pluginClient: pluginClient,
|
|
task: taskState.TaskConfig,
|
|
procState: drivers.TaskStateRunning,
|
|
startedAt: taskState.StartedAt,
|
|
exitCh: make(chan struct{}),
|
|
}
|
|
|
|
d.tasks.Set(taskState.TaskConfig.ID, h)
|
|
|
|
go h.run()
|
|
return nil
|
|
}
|
|
|
|
func (d *ExecDriver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *cstructs.DriverNetwork, error) {
|
|
if _, ok := d.tasks.Get(cfg.ID); ok {
|
|
return nil, nil, fmt.Errorf("task with ID '%s' already started", cfg.ID)
|
|
}
|
|
|
|
var driverConfig TaskConfig
|
|
if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
|
|
return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
|
|
}
|
|
|
|
handle := drivers.NewTaskHandle(pluginName)
|
|
handle.Config = cfg
|
|
|
|
pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
|
|
executorConfig := &dstructs.ExecutorConfig{
|
|
LogFile: pluginLogFile,
|
|
LogLevel: "debug",
|
|
FSIsolation: true,
|
|
}
|
|
|
|
// TODO: best way to pass port ranges in from client config
|
|
exec, pluginClient, err := utils.CreateExecutor(os.Stderr, hclog.Debug, d.nomadConfig, executorConfig)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
execCmd := &executor.ExecCommand{
|
|
Cmd: driverConfig.Command,
|
|
Args: driverConfig.Args,
|
|
Env: cfg.EnvList(),
|
|
User: cfg.User,
|
|
ResourceLimits: true,
|
|
Resources: &executor.Resources{
|
|
CPU: int(cfg.Resources.LinuxResources.CPUShares),
|
|
MemoryMB: int(drivers.BytesToMB(cfg.Resources.LinuxResources.MemoryLimitBytes)),
|
|
DiskMB: cfg.Resources.NomadResources.DiskMB,
|
|
},
|
|
TaskDir: cfg.TaskDir().Dir,
|
|
StdoutPath: cfg.StdoutPath,
|
|
StderrPath: cfg.StderrPath,
|
|
}
|
|
|
|
ps, err := exec.Launch(execCmd)
|
|
if err != nil {
|
|
pluginClient.Kill()
|
|
return nil, nil, err
|
|
}
|
|
|
|
h := &execTaskHandle{
|
|
exec: exec,
|
|
pid: ps.Pid,
|
|
pluginClient: pluginClient,
|
|
task: cfg,
|
|
procState: drivers.TaskStateRunning,
|
|
startedAt: time.Now().Round(time.Millisecond),
|
|
logger: d.logger,
|
|
exitCh: make(chan struct{}),
|
|
}
|
|
|
|
driverState := &TaskState{
|
|
ReattachConfig: shared.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
|
|
Pid: ps.Pid,
|
|
TaskConfig: cfg,
|
|
StartedAt: h.startedAt,
|
|
}
|
|
|
|
if err := handle.SetDriverState(&driverState); err != nil {
|
|
d.logger.Error("failed to start task, error setting driver state", "error", err)
|
|
exec.Shutdown("", 0)
|
|
pluginClient.Kill()
|
|
return nil, nil, err
|
|
}
|
|
|
|
d.tasks.Set(cfg.ID, h)
|
|
go h.run()
|
|
return handle, nil, nil
|
|
}
|
|
|
|
func (d *ExecDriver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
|
|
ch := make(chan *drivers.ExitResult)
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
go d.handleWait(ctx, handle, ch)
|
|
|
|
return ch, nil
|
|
}
|
|
|
|
func (d *ExecDriver) handleWait(ctx context.Context, handle *execTaskHandle, ch chan *drivers.ExitResult) {
|
|
defer close(ch)
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-d.ctx.Done():
|
|
return
|
|
case <-handle.exitCh:
|
|
ch <- handle.exitResult
|
|
}
|
|
}
|
|
|
|
func (d *ExecDriver) StopTask(taskID string, timeout time.Duration, signal string) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
if err := handle.exec.Shutdown(signal, timeout); err != nil {
|
|
if handle.pluginClient.Exited() {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("executor Shutdown failed: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *ExecDriver) DestroyTask(taskID string, force bool) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
if handle.IsRunning() && !force {
|
|
return fmt.Errorf("cannot destroy running task")
|
|
}
|
|
|
|
if !handle.pluginClient.Exited() {
|
|
if handle.IsRunning() {
|
|
if err := handle.exec.Shutdown("", 0); err != nil {
|
|
handle.logger.Error("destroying executor failed", "err", err)
|
|
}
|
|
}
|
|
|
|
handle.pluginClient.Kill()
|
|
}
|
|
|
|
d.tasks.Delete(taskID)
|
|
return nil
|
|
}
|
|
|
|
func (d *ExecDriver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
handle.stateLock.RLock()
|
|
defer handle.stateLock.RUnlock()
|
|
|
|
status := &drivers.TaskStatus{
|
|
ID: handle.task.ID,
|
|
Name: handle.task.Name,
|
|
State: handle.procState,
|
|
StartedAt: handle.startedAt,
|
|
CompletedAt: handle.completedAt,
|
|
ExitResult: handle.exitResult,
|
|
DriverAttributes: map[string]string{
|
|
"pid": strconv.Itoa(handle.pid),
|
|
},
|
|
}
|
|
|
|
return status, nil
|
|
}
|
|
|
|
func (d *ExecDriver) TaskStats(taskID string) (*cstructs.TaskResourceUsage, error) {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
stats, err := handle.exec.Stats()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to retrieve stats from executor: %v", err)
|
|
}
|
|
|
|
return stats, nil
|
|
}
|
|
|
|
func (d *ExecDriver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
|
|
return d.eventer.TaskEvents(ctx)
|
|
}
|
|
|
|
func (d *ExecDriver) SignalTask(taskID string, signal string) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
sig := os.Interrupt
|
|
if s, ok := signals.SignalLookup[signal]; ok {
|
|
d.logger.Warn("signal to send to task unknown, using SIGINT", "signal", signal, "task_id", handle.task.ID)
|
|
sig = s
|
|
}
|
|
return handle.exec.Signal(sig)
|
|
}
|
|
|
|
func (d *ExecDriver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
|
|
if len(cmd) == 0 {
|
|
return nil, fmt.Errorf("error cmd must have atleast one value")
|
|
}
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
args := []string{}
|
|
if len(cmd) > 1 {
|
|
args = cmd[1:]
|
|
}
|
|
|
|
out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &drivers.ExecTaskResult{
|
|
Stdout: out,
|
|
ExitResult: &drivers.ExitResult{
|
|
ExitCode: exitCode,
|
|
},
|
|
}, nil
|
|
}
|