open-nomad/client/driver/executor/executor.go

777 lines
22 KiB
Go
Raw Normal View History

2016-02-05 00:03:17 +00:00
package executor
2016-02-04 00:03:43 +00:00
import (
"context"
2016-02-04 00:03:43 +00:00
"fmt"
"io/ioutil"
2016-02-04 00:03:43 +00:00
"log"
"net"
2016-02-04 00:03:43 +00:00
"os"
"os/exec"
2016-03-19 19:18:10 +00:00
"path/filepath"
2016-02-04 00:03:43 +00:00
"runtime"
"strconv"
"strings"
2016-02-04 00:03:43 +00:00
"sync"
"syscall"
"time"
"github.com/armon/circbuf"
"github.com/hashicorp/go-multierror"
2016-05-11 19:56:47 +00:00
"github.com/mitchellh/go-ps"
"github.com/shirou/gopsutil/process"
2016-02-04 00:03:43 +00:00
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/driver/env"
2016-02-19 22:01:07 +00:00
"github.com/hashicorp/nomad/client/driver/logging"
2016-05-19 20:32:03 +00:00
"github.com/hashicorp/nomad/client/stats"
shelpers "github.com/hashicorp/nomad/helper/stats"
2016-02-04 00:03:43 +00:00
"github.com/hashicorp/nomad/nomad/structs"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
2016-02-04 00:03:43 +00:00
)
const (
// pidScanInterval is the interval at which the executor scans the process
// tree for finding out the pids that the executor and it's child processes
// have forked
pidScanInterval = 5 * time.Second
)
2016-06-10 02:45:41 +00:00
var (
// The statistics the basic executor exposes
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
2016-06-10 02:45:41 +00:00
)
// Executor is the interface which allows a driver to launch and supervise
// a process
type Executor interface {
2016-10-12 18:35:29 +00:00
SetContext(ctx *ExecutorContext) error
LaunchCmd(command *ExecCommand) (*ProcessState, error)
LaunchSyslogServer() (*SyslogServerState, error)
Wait() (*ProcessState, error)
ShutDown() error
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
UpdateTask(task *structs.Task) error
2016-03-30 05:05:02 +00:00
Version() (*ExecutorVersion, error)
2016-04-28 23:06:01 +00:00
Stats() (*cstructs.TaskResourceUsage, error)
2016-10-10 18:46:27 +00:00
Signal(s os.Signal) error
Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error)
}
// ExecutorContext holds context to configure the command user
// wants to run and isolate it
2016-02-04 00:03:43 +00:00
type ExecutorContext struct {
2016-02-06 01:07:02 +00:00
// TaskEnv holds information about the environment of a Task
TaskEnv *env.TaskEnv
2016-02-06 01:07:02 +00:00
// Task is the task whose executor is being launched
Task *structs.Task
2016-02-06 01:07:02 +00:00
// TaskDir is the host path to the task's root
TaskDir string
// LogDir is the host path where logs should be written
LogDir string
// Driver is the name of the driver that invoked the executor
Driver string
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
2016-02-06 01:07:02 +00:00
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
2016-02-04 00:03:43 +00:00
}
// ExecCommand holds the user command, args, and other isolation related
// settings.
2016-02-04 00:03:43 +00:00
type ExecCommand struct {
// Cmd is the command that the user wants to run.
Cmd string
// Args is the args of the command that the user wants to run.
2016-02-04 00:03:43 +00:00
Args []string
// FSIsolation determines whether the command would be run in a chroot.
FSIsolation bool
// User is the user which the executor uses to run the command.
User string
// ResourceLimits determines whether resource limits are enforced by the
// executor.
ResourceLimits bool
2016-02-04 00:03:43 +00:00
}
// ProcessState holds information about the state of a user process.
2016-02-04 00:03:43 +00:00
type ProcessState struct {
Pid int
ExitCode int
Signal int
IsolationConfig *dstructs.IsolationConfig
Time time.Time
2016-02-04 00:03:43 +00:00
}
// nomadPid holds a pid and it's cpu percentage calculator
type nomadPid struct {
pid int
cpuStatsTotal *stats.CpuStats
cpuStatsUser *stats.CpuStats
cpuStatsSys *stats.CpuStats
}
// SyslogServerState holds the address and islation information of a launched
// syslog server
type SyslogServerState struct {
IsolationConfig *dstructs.IsolationConfig
Addr string
2016-02-04 00:03:43 +00:00
}
2016-03-30 05:05:02 +00:00
// ExecutorVersion is the version of the executor
type ExecutorVersion struct {
Version string
}
func (v *ExecutorVersion) GoString() string {
return v.Version
}
2016-02-05 00:18:10 +00:00
// UniversalExecutor is an implementation of the Executor which launches and
// supervises processes. In addition to process supervision it provides resource
// and file system isolation
2016-02-04 00:03:43 +00:00
type UniversalExecutor struct {
cmd exec.Cmd
ctx *ExecutorContext
command *ExecCommand
2016-02-04 00:03:43 +00:00
pids map[int]*nomadPid
pidLock sync.RWMutex
exitState *ProcessState
processExited chan interface{}
fsIsolationEnforced bool
lre *logging.FileRotator
lro *logging.FileRotator
rotatorLock sync.Mutex
syslogServer *logging.SyslogServer
syslogChan chan *logging.SyslogMessage
resConCtx resourceContainerContext
2016-02-04 00:03:43 +00:00
totalCpuStats *stats.CpuStats
userCpuStats *stats.CpuStats
systemCpuStats *stats.CpuStats
logger *log.Logger
2016-02-04 00:03:43 +00:00
}
2016-02-05 00:18:10 +00:00
// NewExecutor returns an Executor
2016-02-04 00:03:43 +00:00
func NewExecutor(logger *log.Logger) Executor {
if err := shelpers.Init(); err != nil {
logger.Printf("[ERR] executor: unable to initialize stats: %v", err)
}
2016-05-19 20:32:03 +00:00
exec := &UniversalExecutor{
logger: logger,
processExited: make(chan interface{}),
totalCpuStats: stats.NewCpuStats(),
userCpuStats: stats.NewCpuStats(),
systemCpuStats: stats.NewCpuStats(),
pids: make(map[int]*nomadPid),
2016-05-19 20:32:03 +00:00
}
return exec
2016-02-04 00:03:43 +00:00
}
2016-03-29 23:27:31 +00:00
// Version returns the api version of the executor
2016-03-30 05:05:02 +00:00
func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
return &ExecutorVersion{Version: "1.1.0"}, nil
2016-03-29 23:27:31 +00:00
}
2016-10-12 18:35:29 +00:00
// SetContext is used to set the executors context and should be the first call
// after launching the executor.
func (e *UniversalExecutor) SetContext(ctx *ExecutorContext) error {
e.ctx = ctx
return nil
}
// LaunchCmd launches the main process and returns its state. It also
// configures an applies isolation on certain platforms.
2016-10-12 18:35:29 +00:00
func (e *UniversalExecutor) LaunchCmd(command *ExecCommand) (*ProcessState, error) {
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
2016-10-12 18:35:29 +00:00
// Ensure the context has been set first
if e.ctx == nil {
return nil, fmt.Errorf("SetContext must be called before launching a command")
}
e.command = command
2016-02-04 19:51:43 +00:00
// setting the user of the process
if command.User != "" {
e.logger.Printf("[DEBUG] executor: running command as %s", command.User)
if err := e.runAs(command.User); err != nil {
2016-02-04 00:09:17 +00:00
return nil, err
}
2016-02-04 00:03:43 +00:00
}
// set the task dir as the working directory for the command
e.cmd.Dir = e.ctx.TaskDir
// configuring the chroot, resource container, and start the plugin
// process in the chroot.
2016-02-04 00:03:43 +00:00
if err := e.configureIsolation(); err != nil {
return nil, err
}
// Apply ourselves into the resource container. The executor MUST be in
// the resource container before the user task is started, otherwise we
// are subject to a fork attack in which a process escapes isolation by
// immediately forking.
if err := e.applyLimits(os.Getpid()); err != nil {
2016-03-19 19:18:10 +00:00
return nil, err
2016-02-04 00:03:43 +00:00
}
// Setup the loggers
if err := e.configureLoggers(); err != nil {
return nil, err
2016-02-04 00:03:43 +00:00
}
e.cmd.Stdout = e.lro
e.cmd.Stderr = e.lre
2016-02-04 00:03:43 +00:00
2016-03-19 19:18:10 +00:00
// Look up the binary path and make it executable
2016-10-12 18:35:29 +00:00
absPath, err := e.lookupBin(e.ctx.TaskEnv.ReplaceEnv(command.Cmd))
2016-03-19 19:18:10 +00:00
if err != nil {
2016-03-16 02:22:40 +00:00
return nil, err
2016-02-04 19:51:43 +00:00
}
2016-03-19 19:18:10 +00:00
if err := e.makeExecutable(absPath); err != nil {
return nil, err
}
path := absPath
// Determine the path to run as it may have to be relative to the chroot.
if e.fsIsolationEnforced {
rel, err := filepath.Rel(e.ctx.TaskDir, path)
2016-03-19 19:18:10 +00:00
if err != nil {
return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", e.ctx.TaskDir, path, err)
2016-03-19 19:18:10 +00:00
}
path = rel
}
// Set the commands arguments
e.cmd.Path = path
2016-10-12 18:35:29 +00:00
e.cmd.Args = append([]string{e.cmd.Path}, e.ctx.TaskEnv.ParseAndReplace(command.Args)...)
e.cmd.Env = e.ctx.TaskEnv.List()
2016-03-19 19:18:10 +00:00
// Start the process
if err := e.cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.cmd.Args, err)
}
2016-05-11 19:56:47 +00:00
go e.collectPids()
2016-02-04 00:26:10 +00:00
go e.wait()
ic := e.resConCtx.getIsolationConfig()
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
2016-02-04 00:03:43 +00:00
}
// Exec a command inside a container for exec and java drivers.
func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) {
ctx, cancel := context.WithDeadline(context.Background(), deadline)
defer cancel()
2017-05-04 23:21:40 +00:00
return ExecScript(ctx, e.cmd.Dir, e.ctx.TaskEnv, e.cmd.SysProcAttr, name, args)
}
2017-05-04 23:21:40 +00:00
// ExecScript executes cmd with args and returns the output, exit code, and
// error. Output is truncated to client/driver/structs.CheckBufSize
func ExecScript(ctx context.Context, dir string, env *env.TaskEnv, attrs *syscall.SysProcAttr,
2017-05-04 23:21:40 +00:00
name string, args []string) ([]byte, int, error) {
name = env.ReplaceEnv(name)
cmd := exec.CommandContext(ctx, name, env.ParseAndReplace(args)...)
// Copy runtime environment from the main command
2017-05-04 23:21:40 +00:00
cmd.SysProcAttr = attrs
cmd.Dir = dir
cmd.Env = env.List()
// Capture output
buf, _ := circbuf.NewBuffer(int64(dstructs.CheckBufSize))
cmd.Stdout = buf
cmd.Stderr = buf
if err := cmd.Run(); err != nil {
exitErr, ok := err.(*exec.ExitError)
if !ok {
// Non-exit error, return it and let the caller treat
// it as a critical failure
return nil, 0, err
}
// Some kind of error happened; default to critical
exitCode := 2
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
}
// Don't return the exitError as the caller only needs the
// output and code.
return buf.Bytes(), exitCode, nil
}
return buf.Bytes(), 0, nil
}
// configureLoggers sets up the standard out/error file rotators
func (e *UniversalExecutor) configureLoggers() error {
e.rotatorLock.Lock()
defer e.rotatorLock.Unlock()
logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
if e.lro == nil {
lro, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return fmt.Errorf("error creating new stdout log file for %q: %v", e.ctx.Task.Name, err)
}
e.lro = lro
}
if e.lre == nil {
lre, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return fmt.Errorf("error creating new stderr log file for %q: %v", e.ctx.Task.Name, err)
}
e.lre = lre
}
return nil
}
2016-02-05 00:18:10 +00:00
// Wait waits until a process has exited and returns it's exitcode and errors
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
2016-02-04 00:26:10 +00:00
<-e.processExited
return e.exitState, nil
}
2016-03-18 22:04:15 +00:00
// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
2016-02-10 16:13:08 +00:00
// UpdateLogConfig updates the log configuration
2016-02-08 18:10:01 +00:00
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
e.ctx.Task.LogConfig = logConfig
2016-02-08 18:10:01 +00:00
if e.lro == nil {
return fmt.Errorf("log rotator for stdout doesn't exist")
}
e.lro.MaxFiles = logConfig.MaxFiles
e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
if e.lre == nil {
return fmt.Errorf("log rotator for stderr doesn't exist")
}
e.lre.MaxFiles = logConfig.MaxFiles
e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
return nil
}
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
e.ctx.Task = task
2016-03-18 22:04:15 +00:00
// Updating Log Config
2016-10-28 17:57:35 +00:00
e.rotatorLock.Lock()
if e.lro != nil && e.lre != nil {
fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
e.lro.MaxFiles = task.LogConfig.MaxFiles
e.lro.FileSize = fileSize
e.lre.MaxFiles = task.LogConfig.MaxFiles
e.lre.FileSize = fileSize
}
e.rotatorLock.Unlock()
2016-03-24 02:20:08 +00:00
return nil
}
2016-02-04 00:26:10 +00:00
func (e *UniversalExecutor) wait() {
2016-02-04 18:21:33 +00:00
defer close(e.processExited)
2016-02-04 00:03:43 +00:00
err := e.cmd.Wait()
ic := e.resConCtx.getIsolationConfig()
2016-02-04 00:03:43 +00:00
if err == nil {
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()}
2016-02-04 00:26:10 +00:00
return
2016-02-04 00:03:43 +00:00
}
2016-10-10 18:46:27 +00:00
e.lre.Close()
e.lro.Close()
2016-02-04 00:03:43 +00:00
exitCode := 1
2016-04-01 20:28:20 +00:00
var signal int
2016-02-04 00:03:43 +00:00
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
2016-04-01 20:28:20 +00:00
if status.Signaled() {
// bash(1) uses the lower 7 bits of a uint8
// to indicate normal program failure (see
// <sysexits.h>). If a process terminates due
// to a signal, encode the signal number to
// indicate which signal caused the process
// to terminate. Mirror this exit code
// encoding scheme.
const exitSignalBase = 128
2016-04-01 20:28:20 +00:00
signal = int(status.Signal())
exitCode = exitSignalBase + signal
2016-04-01 20:28:20 +00:00
}
2016-02-04 00:03:43 +00:00
}
2016-04-19 22:54:21 +00:00
} else {
e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err)
2016-02-04 00:03:43 +00:00
}
2016-04-19 22:54:21 +00:00
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()}
2016-02-04 00:03:43 +00:00
}
2016-02-09 18:00:42 +00:00
var (
// finishedErr is the error message received when trying to kill and already
// exited process.
finishedErr = "os: process already finished"
)
// ClientCleanup is the cleanup routine that a Nomad Client uses to remove the
// reminants of a child UniversalExecutor.
func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error {
return clientCleanup(ic, pid)
}
// Exit cleans up the alloc directory, destroys resource container and kills the
// user process
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) Exit() error {
var merr multierror.Error
if e.syslogServer != nil {
e.syslogServer.Shutdown()
}
2016-10-26 00:27:13 +00:00
if e.lre != nil {
e.lre.Close()
}
if e.lro != nil {
e.lro.Close()
}
// If the executor did not launch a process, return.
if e.command == nil {
return nil
}
// Prefer killing the process via the resource container.
if e.cmd.Process != nil && !e.command.ResourceLimits {
proc, err := os.FindProcess(e.cmd.Process.Pid)
if err != nil {
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
e.cmd.Process.Pid, err)
2016-02-09 18:00:42 +00:00
} else if err := proc.Kill(); err != nil && err.Error() != finishedErr {
merr.Errors = append(merr.Errors,
fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err))
}
2016-02-04 00:03:43 +00:00
}
if e.command.ResourceLimits {
if err := e.resConCtx.executorCleanup(); err != nil {
merr.Errors = append(merr.Errors, err)
}
2016-02-04 20:40:48 +00:00
}
return merr.ErrorOrNil()
2016-02-04 00:03:43 +00:00
}
2016-02-05 00:18:10 +00:00
// Shutdown sends an interrupt signal to the user process
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) ShutDown() error {
if e.cmd.Process == nil {
return fmt.Errorf("executor.shutdown error: no process found")
}
2016-02-04 00:03:43 +00:00
proc, err := os.FindProcess(e.cmd.Process.Pid)
if err != nil {
return fmt.Errorf("executor.shutdown failed to find process: %v", err)
2016-02-04 00:03:43 +00:00
}
if runtime.GOOS == "windows" {
if err := proc.Kill(); err != nil && err.Error() != finishedErr {
return err
}
return nil
2016-02-04 00:03:43 +00:00
}
if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr {
2016-02-04 20:40:48 +00:00
return fmt.Errorf("executor.shutdown error: %v", err)
}
return nil
2016-02-04 00:03:43 +00:00
}
// pidStats returns the resource usage stats per pid
func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) {
stats := make(map[string]*cstructs.ResourceUsage)
e.pidLock.RLock()
pids := make(map[int]*nomadPid, len(e.pids))
for k, v := range e.pids {
pids[k] = v
}
e.pidLock.RUnlock()
for pid, np := range pids {
p, err := process.NewProcess(int32(pid))
if err != nil {
2017-01-09 19:21:51 +00:00
e.logger.Printf("[TRACE] executor: unable to create new process with pid: %v", pid)
continue
}
ms := &cstructs.MemoryStats{}
if memInfo, err := p.MemoryInfo(); err == nil {
ms.RSS = memInfo.RSS
ms.Swap = memInfo.Swap
2016-06-10 02:45:41 +00:00
ms.Measured = ExecutorBasicMeasuredMemStats
}
2016-05-21 07:49:17 +00:00
cs := &cstructs.CpuStats{}
if cpuStats, err := p.Times(); err == nil {
cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second))
cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second))
2016-06-10 02:45:41 +00:00
cs.Measured = ExecutorBasicMeasuredCpuStats
// calculate cpu usage percent
cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second))
}
stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs}
}
return stats, nil
}
2016-03-19 19:18:10 +00:00
// lookupBin looks for path to the binary to run by looking for the binary in
// the following locations, in-order: task/local/, task/, based on host $PATH.
// The return path is absolute.
func (e *UniversalExecutor) lookupBin(bin string) (string, error) {
// Check in the local directory
local := filepath.Join(e.ctx.TaskDir, allocdir.TaskLocal, bin)
2016-03-19 19:18:10 +00:00
if _, err := os.Stat(local); err == nil {
return local, nil
}
// Check at the root of the task's directory
root := filepath.Join(e.ctx.TaskDir, bin)
2016-03-19 19:18:10 +00:00
if _, err := os.Stat(root); err == nil {
return root, nil
}
// Check the $PATH
if host, err := exec.LookPath(bin); err == nil {
return host, nil
}
return "", fmt.Errorf("binary %q could not be found", bin)
}
// makeExecutable makes the given file executable for root,group,others.
func (e *UniversalExecutor) makeExecutable(binPath string) error {
if runtime.GOOS == "windows" {
return nil
}
2016-03-16 02:22:40 +00:00
fi, err := os.Stat(binPath)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("binary %q does not exist", binPath)
}
return fmt.Errorf("specified binary is invalid: %v", err)
}
// If it is not executable, make it so.
perm := fi.Mode().Perm()
req := os.FileMode(0555)
if perm&req != req {
if err := os.Chmod(binPath, perm|req); err != nil {
return fmt.Errorf("error making %q executable: %s", binPath, err)
}
}
return nil
}
// getFreePort returns a free port ready to be listened on between upper and
// lower bounds
func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
if runtime.GOOS == "windows" {
return e.listenerTCP(lowerBound, upperBound)
}
return e.listenerUnix()
}
// listenerTCP creates a TCP listener using an unused port between an upper and
// lower bound
func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
for i := lowerBound; i <= upperBound; i++ {
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
if err != nil {
return nil, err
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
continue
}
return l, nil
}
return nil, fmt.Errorf("No free port found")
}
// listenerUnix creates a Unix domain socket
func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
f, err := ioutil.TempFile("", "plugin")
if err != nil {
return nil, err
}
path := f.Name()
if err := f.Close(); err != nil {
return nil, err
}
if err := os.Remove(path); err != nil {
return nil, err
}
return net.Listen("unix", path)
}
// collectPids collects the pids of the child processes that the executor is
// running every 5 seconds
2016-05-11 19:56:47 +00:00
func (e *UniversalExecutor) collectPids() {
// Fire the timer right away when the executor starts from there on the pids
// are collected every scan interval
timer := time.NewTimer(0)
defer timer.Stop()
2016-05-11 19:56:47 +00:00
for {
select {
case <-timer.C:
pids, err := e.getAllPids()
if err != nil {
e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err)
}
e.pidLock.Lock()
// Adding pids which are not being tracked
for pid, np := range pids {
if _, ok := e.pids[pid]; !ok {
e.pids[pid] = np
}
}
// Removing pids which are no longer present
for pid := range e.pids {
if _, ok := pids[pid]; !ok {
delete(e.pids, pid)
}
}
e.pidLock.Unlock()
timer.Reset(pidScanInterval)
2016-05-11 19:56:47 +00:00
case <-e.processExited:
return
}
}
}
// scanPids scans all the pids on the machine running the current executor and
// returns the child processes of the executor.
func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) {
2016-05-11 19:56:47 +00:00
processFamily := make(map[int]struct{})
2016-05-25 21:58:32 +00:00
processFamily[parentPid] = struct{}{}
2016-09-27 23:57:26 +00:00
// A mapping of pids to their parent pids. It is used to build the process
// tree of the executing task
pidsRemaining := make(map[int]int, len(allPids))
for _, pid := range allPids {
pidsRemaining[pid.Pid()] = pid.PPid()
}
2016-05-25 21:58:32 +00:00
for {
// flag to indicate if we have found a match
foundNewPid := false
2016-09-27 23:57:26 +00:00
for pid, ppid := range pidsRemaining {
_, childPid := processFamily[ppid]
2016-05-25 21:58:32 +00:00
// checking if the pid is a child of any of the parents
if childPid {
2016-09-27 23:57:26 +00:00
processFamily[pid] = struct{}{}
delete(pidsRemaining, pid)
2016-05-25 21:58:32 +00:00
foundNewPid = true
}
}
// not scanning anymore if we couldn't find a single match
if !foundNewPid {
break
2016-05-11 19:56:47 +00:00
}
}
2016-09-27 23:57:26 +00:00
res := make(map[int]*nomadPid)
2016-05-11 19:56:47 +00:00
for pid := range processFamily {
np := nomadPid{
2016-06-10 03:45:16 +00:00
pid: pid,
cpuStatsTotal: stats.NewCpuStats(),
cpuStatsUser: stats.NewCpuStats(),
cpuStatsSys: stats.NewCpuStats(),
}
res[pid] = &np
2016-05-11 19:56:47 +00:00
}
return res, nil
}
2016-05-26 07:53:41 +00:00
// aggregatedResourceUsage aggregates the resource usage of all the pids and
// returns a TaskResourceUsage data point
func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage {
ts := time.Now().UTC().UnixNano()
2016-05-26 07:53:41 +00:00
var (
systemModeCPU, userModeCPU, percent float64
totalRSS, totalSwap uint64
)
for _, pidStat := range pidStats {
systemModeCPU += pidStat.CpuStats.SystemMode
userModeCPU += pidStat.CpuStats.UserMode
percent += pidStat.CpuStats.Percent
totalRSS += pidStat.MemoryStats.RSS
totalSwap += pidStat.MemoryStats.Swap
}
totalCPU := &cstructs.CpuStats{
SystemMode: systemModeCPU,
UserMode: userModeCPU,
Percent: percent,
Measured: ExecutorBasicMeasuredCpuStats,
TotalTicks: e.systemCpuStats.TicksConsumed(percent),
2016-05-26 07:53:41 +00:00
}
totalMemory := &cstructs.MemoryStats{
2016-06-10 02:45:41 +00:00
RSS: totalRSS,
Swap: totalSwap,
Measured: ExecutorBasicMeasuredMemStats,
2016-05-26 07:53:41 +00:00
}
resourceUsage := cstructs.ResourceUsage{
MemoryStats: totalMemory,
CpuStats: totalCPU,
}
return &cstructs.TaskResourceUsage{
ResourceUsage: &resourceUsage,
Timestamp: ts,
Pids: pidStats,
}
}
2016-10-10 18:46:27 +00:00
// Signal sends the passed signal to the task
func (e *UniversalExecutor) Signal(s os.Signal) error {
if e.cmd.Process == nil {
return fmt.Errorf("Task not yet run")
}
2017-06-12 18:11:36 +00:00
e.logger.Printf("[DEBUG] executor: sending signal %s to PID %d", s, e.cmd.Process.Pid)
2016-10-10 18:46:27 +00:00
err := e.cmd.Process.Signal(s)
if err != nil {
e.logger.Printf("[ERR] executor: sending signal %v failed: %v", s, err)
2016-10-10 18:46:27 +00:00
return err
}
return nil
}