From ca94d25cb4276335537ae1df68f913ee279ce23c Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 19 Apr 2016 13:48:02 -0700 Subject: [PATCH] Freeze the cgroup and cleanup around Shutdown --- client/driver/executor/executor.go | 30 +++++--- client/driver/executor/executor_linux.go | 87 +++++++++++++++--------- 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index 64fe0157b..432f7a691 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -362,7 +362,13 @@ func (e *UniversalExecutor) Exit() error { e.lre.Close() e.lro.Close() - if e.command != nil && e.cmd.Process != nil { + // If the executor did not launch a process, return. + if e.command == nil { + return nil + } + + // Prefer killing the process via cgroups. + if e.cmd.Process != nil && !e.command.ResourceLimits { proc, err := os.FindProcess(e.cmd.Process.Pid) if err != nil { e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v", @@ -373,18 +379,19 @@ func (e *UniversalExecutor) Exit() error { } } - if e.command != nil && e.command.FSIsolation { - if err := e.removeChrootMounts(); err != nil { - merr.Errors = append(merr.Errors, err) - } - } - if e.command != nil && e.command.ResourceLimits { + if e.command.ResourceLimits { e.cgLock.Lock() if err := DestroyCgroup(e.groups, e.cgPaths, os.Getpid()); err != nil { merr.Errors = append(merr.Errors, err) } e.cgLock.Unlock() } + + if e.command.FSIsolation { + if err := e.removeChrootMounts(); err != nil { + merr.Errors = append(merr.Errors, err) + } + } return merr.ErrorOrNil() } @@ -395,12 +402,15 @@ func (e *UniversalExecutor) ShutDown() error { } proc, err := os.FindProcess(e.cmd.Process.Pid) if err != nil { - return fmt.Errorf("executor.shutdown error: %v", err) + return fmt.Errorf("executor.shutdown failed to find process: %v", err) } if runtime.GOOS == "windows" { - return proc.Kill() + if err := proc.Kill(); err != nil && err.Error() != finishedErr { + return err + } + return nil } - if err = proc.Signal(os.Interrupt); err != nil { + if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr { return fmt.Errorf("executor.shutdown error: %v", err) } return nil diff --git a/client/driver/executor/executor_linux.go b/client/driver/executor/executor_linux.go index 416ca29d5..c7f3b5b7d 100644 --- a/client/driver/executor/executor_linux.go +++ b/client/driver/executor/executor_linux.go @@ -6,6 +6,7 @@ import ( "os/user" "path/filepath" "strconv" + "strings" "syscall" "github.com/hashicorp/go-multierror" @@ -186,38 +187,12 @@ func (e *UniversalExecutor) removeChrootMounts() error { } // destroyCgroup kills all processes in the cgroup and removes the cgroup -// configuration from the host. +// configuration from the host. This function is idempotent. func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error { - merrs := new(multierror.Error) + mErrs := new(multierror.Error) if groups == nil { return fmt.Errorf("Can't destroy: cgroup configuration empty") } - manager := getCgroupManager(groups, cgPaths) - if pids, perr := manager.GetAllPids(); perr == nil { - for _, pid := range pids { - // If the pid is the pid of the executor then we don't kill it, the - // executor is going to be killed by the driver once the Wait - // returns - if pid == executorPid { - continue - } - - proc, err := os.FindProcess(pid) - if err != nil { - merrs.Errors = append(merrs.Errors, fmt.Errorf("error finding process %v: %v", pid, err)) - } else { - if e := proc.Kill(); e != nil { - merrs.Errors = append(merrs.Errors, fmt.Errorf("error killing process %v: %v", pid, e)) - } - - // Don't capture the error because we expect this to fail for - // processes we didn't fork. - proc.Wait() - } - } - } else { - merrs.Errors = append(merrs.Errors, fmt.Errorf("error getting pids: %v", perr)) - } // Move the executor into the global cgroup so that the task specific // cgroup can be destroyed. @@ -225,15 +200,63 @@ func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, execu nilGroup.Path = "/" nilGroup.Resources = groups.Resources nilManager := getCgroupManager(nilGroup, nil) - if err := nilManager.Apply(executorPid); err != nil { - multierror.Append(merrs, fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)) + err := nilManager.Apply(executorPid) + if err != nil && !strings.Contains(err.Error(), "no such process") { + return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err) + } + + // Freeze the Cgroup so that it can not continue to fork/exec. + manager := getCgroupManager(groups, cgPaths) + err = manager.Freeze(cgroupConfig.Frozen) + if err != nil && !strings.Contains(err.Error(), "no such file or directory") { + return fmt.Errorf("failed to freeze cgroup: %v", err) + } + + var procs []*os.Process + pids, err := manager.GetAllPids() + if err != nil { + multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err)) + + // Unfreeze the cgroup. + err = manager.Freeze(cgroupConfig.Thawed) + if err != nil && !strings.Contains(err.Error(), "no such file or directory") { + multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) + } + return mErrs.ErrorOrNil() + } + + // Kill the processes in the cgroup + for _, pid := range pids { + proc, err := os.FindProcess(pid) + if err != nil { + multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err)) + continue + } + + procs = append(procs, proc) + if e := proc.Kill(); e != nil { + multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e)) + } + } + + // Unfreeze the cgroug so we can wait. + err = manager.Freeze(cgroupConfig.Thawed) + if err != nil && !strings.Contains(err.Error(), "no such file or directory") { + multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) + } + + // Wait on the killed processes to ensure they are cleaned up. + for _, proc := range procs { + // Don't capture the error because we expect this to fail for + // processes we didn't fork. + proc.Wait() } // Remove the cgroup. if err := manager.Destroy(); err != nil { - multierror.Append(merrs, fmt.Errorf("failed to delete the cgroup directories: %v", err)) + multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err)) } - return merrs.ErrorOrNil() + return mErrs.ErrorOrNil() } // getCgroupManager returns the correct libcontainer cgroup manager.