open-nomad/client/driver/executor/executor_linux.go

374 lines
12 KiB
Go
Raw Normal View History

2016-02-05 00:03:17 +00:00
package executor
2016-02-02 21:38:38 +00:00
import (
"fmt"
"os"
"os/user"
"path/filepath"
"strconv"
"strings"
"syscall"
2016-05-09 14:57:26 +00:00
"time"
"github.com/hashicorp/go-multierror"
2016-05-26 07:53:41 +00:00
"github.com/mitchellh/go-ps"
2016-02-03 19:41:49 +00:00
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
2016-04-29 18:40:37 +00:00
"github.com/opencontainers/runc/libcontainer/system"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
2016-02-03 19:41:49 +00:00
"github.com/hashicorp/nomad/nomad/structs"
)
var (
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
chrootEnv = map[string]string{
"/bin": "/bin",
"/etc": "/etc",
"/lib": "/lib",
"/lib32": "/lib32",
"/lib64": "/lib64",
2016-03-16 16:56:04 +00:00
"/run/resolvconf": "/run/resolvconf",
"/sbin": "/sbin",
2016-03-29 23:14:56 +00:00
"/usr": "/usr",
}
2016-04-29 18:40:37 +00:00
2016-06-10 02:45:41 +00:00
// clockTicks is the clocks per second of the machine
2016-04-29 18:40:37 +00:00
clockTicks = uint64(system.GetClockTicks())
2016-06-10 02:45:41 +00:00
// The statistics the executor exposes when using cgroups
ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
2016-02-02 21:38:38 +00:00
)
2016-02-05 00:18:10 +00:00
// configureIsolation configures chroot and creates cgroups
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) configureIsolation() error {
if e.command.FSIsolation {
2016-02-04 00:03:43 +00:00
if err := e.configureChroot(); err != nil {
return err
}
}
if e.command.ResourceLimits {
if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
2016-02-04 00:03:43 +00:00
return fmt.Errorf("error creating cgroups: %v", err)
}
2016-02-03 19:41:49 +00:00
}
2016-02-04 00:03:43 +00:00
return nil
}
2016-02-03 19:41:49 +00:00
2016-02-05 00:18:10 +00:00
// applyLimits puts a process in a pre-configured cgroup
func (e *UniversalExecutor) applyLimits(pid int) error {
if !e.command.ResourceLimits {
2016-02-04 00:03:43 +00:00
return nil
}
// Entering the process in the cgroup
manager := getCgroupManager(e.resConCtx.groups, nil)
if err := manager.Apply(pid); err != nil {
e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
if er := e.removeChrootMounts(); er != nil {
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
2016-02-03 19:41:49 +00:00
}
2016-02-04 00:03:43 +00:00
return err
2016-02-03 19:41:49 +00:00
}
e.resConCtx.cgPaths = manager.GetPaths()
cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
if err := manager.Set(&cgConfig); err != nil {
e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
}
if er := e.removeChrootMounts(); er != nil {
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
}
return err
}
2016-02-04 00:03:43 +00:00
return nil
}
2016-02-04 00:03:43 +00:00
// configureCgroups converts a Nomad Resources specification into the equivalent
// cgroup configuration. It returns an error if the resources are invalid.
func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
e.resConCtx.groups = &cgroupConfig.Cgroup{}
e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
2016-03-02 00:53:56 +00:00
cgroupName := structs.GenerateUUID()
e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
2016-02-04 00:03:43 +00:00
// TODO: verify this is needed for things like network access
e.resConCtx.groups.Resources.AllowAllDevices = true
2016-02-04 00:03:43 +00:00
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume
e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
2016-02-04 00:03:43 +00:00
// Disable swap to avoid issues on the machine
e.resConCtx.groups.Resources.MemorySwap = int64(-1)
}
2016-02-04 00:03:43 +00:00
if resources.CPU < 2 {
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
}
2016-02-04 00:03:43 +00:00
// Set the relative CPU shares for this cgroup.
e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
2016-02-04 00:03:43 +00:00
if resources.IOPS != 0 {
// Validate it is in an acceptable range.
if resources.IOPS < 10 || resources.IOPS > 1000 {
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
}
2016-02-04 00:03:43 +00:00
e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
}
return nil
}
// Stats reports the resource utilization of the cgroup. If there is no resource
// isolation we aggregate the resource utilization of all the pids launched by
// the executor.
2016-04-28 23:06:01 +00:00
func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
2016-05-11 19:56:47 +00:00
if !e.command.ResourceLimits {
2016-05-26 07:53:41 +00:00
pidStats, err := e.pidStats()
if err != nil {
return nil, err
}
return e.aggregatedResourceUsage(pidStats), nil
2016-05-11 19:56:47 +00:00
}
ts := time.Now()
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
2016-04-28 23:06:01 +00:00
stats, err := manager.GetStats()
if err != nil {
return nil, err
}
2016-04-29 18:40:37 +00:00
// Memory Related Stats
2016-04-29 20:03:02 +00:00
swap := stats.MemoryStats.SwapUsage
maxUsage := stats.MemoryStats.Usage.MaxUsage
rss := stats.MemoryStats.Stats["rss"]
cache := stats.MemoryStats.Stats["cache"]
2016-04-29 18:40:37 +00:00
ms := &cstructs.MemoryStats{
2016-04-29 20:03:02 +00:00
RSS: rss,
Cache: cache,
Swap: swap.Usage,
MaxUsage: maxUsage,
KernelUsage: stats.MemoryStats.KernelUsage.Usage,
KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
2016-06-10 02:45:41 +00:00
Measured: ExecutorCgroupMeasuredMemStats,
2016-04-29 18:40:37 +00:00
}
// CPU Related Stats
totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
2016-04-29 18:40:37 +00:00
totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
2016-05-21 07:49:17 +00:00
cs := &cstructs.CpuStats{
SystemMode: e.systemCpuStats.Percent(kernelModeTime),
UserMode: e.userCpuStats.Percent(userModeTime),
Percent: totalPercent,
2016-04-29 20:03:02 +00:00
ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime,
TotalTicks: e.systemCpuStats.TicksConsumed(totalPercent),
2016-06-10 02:45:41 +00:00
Measured: ExecutorCgroupMeasuredCpuStats,
2016-04-29 18:40:37 +00:00
}
taskResUsage := cstructs.TaskResourceUsage{
ResourceUsage: &cstructs.ResourceUsage{
MemoryStats: ms,
CpuStats: cs,
},
2016-05-28 02:08:17 +00:00
Timestamp: ts.UTC().UnixNano(),
}
2016-05-26 07:53:41 +00:00
if pidStats, err := e.pidStats(); err == nil {
taskResUsage.Pids = pidStats
}
return &taskResUsage, nil
2016-04-28 23:06:01 +00:00
}
// runAs takes a user id as a string and looks up the user, and sets the command
// to execute as that user.
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) runAs(userid string) error {
u, err := user.Lookup(userid)
if err != nil {
return fmt.Errorf("Failed to identify user %v: %v", userid, err)
}
// Convert the uid and gid
uid, err := strconv.ParseUint(u.Uid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert userid to uint32: %s", err)
}
gid, err := strconv.ParseUint(u.Gid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
}
// Set the command to run as that user and group.
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
if e.cmd.SysProcAttr.Credential == nil {
e.cmd.SysProcAttr.Credential = &syscall.Credential{}
}
e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
return nil
}
2016-02-05 00:18:10 +00:00
// configureChroot configures a chroot
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) configureChroot() error {
allocDir := e.ctx.AllocDir
if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
2016-02-04 00:03:43 +00:00
return err
}
chroot := chrootEnv
if e.command.ChrootEnv != nil && len(e.command.ChrootEnv) > 0 {
chroot = e.command.ChrootEnv
}
if err := allocDir.Embed(e.ctx.Task.Name, chroot); err != nil {
2016-02-04 00:03:43 +00:00
return err
}
// Set the tasks AllocDir environment variable.
e.ctx.TaskEnv.
SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)).
SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)).
Build()
2016-02-04 00:03:43 +00:00
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
e.cmd.SysProcAttr.Chroot = e.taskDir
e.cmd.Dir = "/"
2016-02-04 00:03:43 +00:00
if err := allocDir.MountSpecialDirs(e.taskDir); err != nil {
return err
}
e.fsIsolationEnforced = true
2016-02-04 00:03:43 +00:00
return nil
}
// cleanTaskDir is an idempotent operation to clean the task directory and
// should be called when tearing down the task.
2016-02-04 00:03:43 +00:00
func (e *UniversalExecutor) removeChrootMounts() error {
// Prevent a race between Wait/ForceStop
e.resConCtx.cgLock.Lock()
defer e.resConCtx.cgLock.Unlock()
return e.ctx.AllocDir.UnmountAll()
2016-02-02 21:38:38 +00:00
}
2016-05-26 22:25:18 +00:00
// getAllPids returns the pids of all the processes spun up by the executor. We
// use the libcontainer apis to get the pids when the user is using cgroup
// isolation and we scan the entire process table if the user is not using any
// isolation
func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
2016-05-11 19:56:47 +00:00
if e.command.ResourceLimits {
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
pids, err := manager.GetAllPids()
if err != nil {
return nil, err
}
np := make(map[int]*nomadPid, len(pids))
for _, pid := range pids {
np[pid] = &nomadPid{
2016-06-10 03:45:16 +00:00
pid: pid,
cpuStatsTotal: stats.NewCpuStats(),
cpuStatsSys: stats.NewCpuStats(),
cpuStatsUser: stats.NewCpuStats(),
}
}
return np, nil
2016-05-11 19:56:47 +00:00
}
2016-05-26 07:53:41 +00:00
allProcesses, err := ps.Processes()
if err != nil {
return nil, err
}
return e.scanPids(os.Getpid(), allProcesses)
2016-05-11 19:56:47 +00:00
}
2016-02-03 19:41:49 +00:00
// destroyCgroup kills all processes in the cgroup and removes the cgroup
// configuration from the host. This function is idempotent.
func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
mErrs := new(multierror.Error)
if groups == nil {
2016-02-03 19:41:49 +00:00
return fmt.Errorf("Can't destroy: cgroup configuration empty")
}
// Move the executor into the global cgroup so that the task specific
// cgroup can be destroyed.
nilGroup := &cgroupConfig.Cgroup{}
nilGroup.Path = "/"
nilGroup.Resources = groups.Resources
nilManager := getCgroupManager(nilGroup, nil)
err := nilManager.Apply(executorPid)
if err != nil && !strings.Contains(err.Error(), "no such process") {
return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
}
// Freeze the Cgroup so that it can not continue to fork/exec.
manager := getCgroupManager(groups, cgPaths)
err = manager.Freeze(cgroupConfig.Frozen)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
return fmt.Errorf("failed to freeze cgroup: %v", err)
}
var procs []*os.Process
pids, err := manager.GetAllPids()
if err != nil {
multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
// Unfreeze the cgroup.
err = manager.Freeze(cgroupConfig.Thawed)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
}
return mErrs.ErrorOrNil()
}
// Kill the processes in the cgroup
for _, pid := range pids {
proc, err := os.FindProcess(pid)
if err != nil {
multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
continue
}
procs = append(procs, proc)
if e := proc.Kill(); e != nil {
multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
}
}
// Unfreeze the cgroug so we can wait.
err = manager.Freeze(cgroupConfig.Thawed)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
}
// Wait on the killed processes to ensure they are cleaned up.
for _, proc := range procs {
// Don't capture the error because we expect this to fail for
// processes we didn't fork.
proc.Wait()
}
2016-02-03 19:41:49 +00:00
// Remove the cgroup.
if err := manager.Destroy(); err != nil {
multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
2016-02-03 19:41:49 +00:00
}
return mErrs.ErrorOrNil()
2016-02-03 19:41:49 +00:00
}
// getCgroupManager returns the correct libcontainer cgroup manager.
func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
2016-04-15 21:55:30 +00:00
return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
2016-02-03 19:41:49 +00:00
}