2022-03-29 00:33:01 +00:00
|
|
|
//go:build linux
|
|
|
|
|
|
|
|
package cgutil
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
|
|
)
|
|
|
|
|
|
|
|
// freezer is the name of the cgroup subsystem used for stopping / starting
|
|
|
|
// a group of processes
|
|
|
|
const freezer = "freezer"
|
|
|
|
|
|
|
|
// thawed and frozen are the two states we put a cgroup in when trying to remove it
|
|
|
|
var (
|
|
|
|
thawed = &configs.Resources{Freezer: configs.Thawed}
|
|
|
|
frozen = &configs.Resources{Freezer: configs.Frozen}
|
|
|
|
)
|
|
|
|
|
|
|
|
// GroupKiller is used for SIGKILL-ing the process tree[s] of a cgroup by leveraging
|
|
|
|
// the freezer cgroup subsystem.
|
|
|
|
type GroupKiller interface {
|
|
|
|
KillGroup(cgroup *configs.Cgroup) error
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewGroupKiller creates a GroupKiller with executor PID pid.
|
|
|
|
func NewGroupKiller(logger hclog.Logger, pid int) GroupKiller {
|
|
|
|
return &killer{
|
|
|
|
logger: logger.Named("group_killer"),
|
|
|
|
pid: pid,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type killer struct {
|
|
|
|
logger hclog.Logger
|
|
|
|
pid int
|
|
|
|
}
|
|
|
|
|
|
|
|
// KillGroup will SIGKILL the process tree present in cgroup, using the freezer
|
|
|
|
// subsystem to prevent further forking, etc.
|
|
|
|
func (d *killer) KillGroup(cgroup *configs.Cgroup) error {
|
|
|
|
if UseV2 {
|
|
|
|
return d.v2(cgroup)
|
|
|
|
}
|
|
|
|
return d.v1(cgroup)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *killer) v1(cgroup *configs.Cgroup) error {
|
|
|
|
if cgroup == nil {
|
|
|
|
return errors.New("missing cgroup")
|
|
|
|
}
|
|
|
|
|
|
|
|
// the actual path to our tasks freezer cgroup
|
2022-08-04 13:56:40 +00:00
|
|
|
path := cgroup.Path
|
2022-03-29 00:33:01 +00:00
|
|
|
|
|
|
|
d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v1", "executor_pid", d.pid)
|
|
|
|
|
|
|
|
// move executor PID into the init freezer cgroup so we can kill the task
|
|
|
|
// pids without killing the executor (which is the process running this code,
|
|
|
|
// doing the killing)
|
|
|
|
initPath, err := cgroups.GetInitCgroupPath(freezer)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to find init cgroup: %w", err)
|
|
|
|
}
|
|
|
|
m := map[string]string{freezer: initPath}
|
|
|
|
if err = cgroups.EnterPid(m, d.pid); err != nil {
|
|
|
|
return fmt.Errorf("failed to add executor pid to init cgroup: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ability to freeze the cgroup
|
|
|
|
freeze := func() {
|
|
|
|
_ = new(fs.FreezerGroup).Set(path, frozen)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ability to thaw the cgroup
|
|
|
|
thaw := func() {
|
|
|
|
_ = new(fs.FreezerGroup).Set(path, thawed)
|
|
|
|
}
|
|
|
|
|
|
|
|
// do the common kill logic
|
|
|
|
if err = d.kill(path, freeze, thaw); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove the cgroup from disk
|
|
|
|
return cgroups.RemovePath(path)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *killer) v2(cgroup *configs.Cgroup) error {
|
|
|
|
if cgroup == nil {
|
|
|
|
return errors.New("missing cgroup")
|
|
|
|
}
|
|
|
|
|
|
|
|
path := filepath.Join(CgroupRoot, cgroup.Path)
|
|
|
|
|
|
|
|
existingPIDs, err := cgroups.GetPids(path)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to determine pids in cgroup: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v2", "executor_pid", d.pid, "existing_pids", existingPIDs)
|
|
|
|
|
2022-08-04 13:56:40 +00:00
|
|
|
mgr, err := fs2.NewManager(cgroup, "")
|
2022-03-29 00:33:01 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to create v2 cgroup manager: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// move executor PID into the root init.scope so we can kill the task pids
|
|
|
|
// without killing the executor (which is the process running this code, doing
|
|
|
|
// the killing)
|
2022-08-04 13:56:40 +00:00
|
|
|
init, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, "init.scope"))
|
2022-03-29 00:33:01 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to create v2 init cgroup manager: %w", err)
|
|
|
|
}
|
|
|
|
if err = init.Apply(d.pid); err != nil {
|
|
|
|
return fmt.Errorf("failed to move executor pid into init.scope cgroup: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
d.logger.Trace("move of executor pid into init.scope complete", "pid", d.pid)
|
|
|
|
|
|
|
|
// ability to freeze the cgroup
|
|
|
|
freeze := func() {
|
|
|
|
_ = mgr.Freeze(configs.Frozen)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ability to thaw the cgroup
|
|
|
|
thaw := func() {
|
|
|
|
_ = mgr.Freeze(configs.Thawed)
|
|
|
|
}
|
|
|
|
|
|
|
|
// do the common kill logic
|
|
|
|
|
|
|
|
if err = d.kill(path, freeze, thaw); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
cgroups: make sure cgroup still exists after task restart
This PR modifies raw_exec and exec to ensure the cgroup for a task
they are driving still exists during a task restart. These drivers
have the same bug but with different root cause.
For raw_exec, we were removing the cgroup in 2 places - the cpuset
manager, and in the unix containment implementation (the thing that
uses freezer cgroup to clean house). During a task restart, the
containment would remove the cgroup, and when the task runner hooks
went to start again would block on waiting for the cgroup to exist,
which will never happen, because it gets created by the cpuset manager
which only runs as an alloc pre-start hook. The fix here is to simply
not delete the cgroup in the containment implementation; killing the
PIDs is enough. The removal happens in the cpuset manager later anyway.
For exec, it's the same idea, except DestroyTask is called on task
failure, which in turn calls into libcontainer, which in turn deletes
the cgroup. In this case we do not have control over the deletion of
the cgroup, so instead we hack the cgroup back into life after the
call to DestroyTask.
All of this only applies to cgroups v2.
2022-05-04 18:51:53 +00:00
|
|
|
// note: do NOT remove the cgroup from disk; leave that to the alloc-level
|
|
|
|
// cpuset mananager.
|
|
|
|
|
|
|
|
return nil
|
2022-03-29 00:33:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// kill is used to SIGKILL all processes in cgroup
|
|
|
|
//
|
|
|
|
// The order of operations is
|
|
|
|
// 0. before calling this method, the executor pid has been moved outside of cgroup
|
|
|
|
// 1. freeze cgroup (so processes cannot fork further)
|
|
|
|
// 2. scan the cgroup to collect all pids
|
|
|
|
// 3. issue SIGKILL to each pid found
|
|
|
|
// 4. thaw the cgroup so processes can go die
|
|
|
|
// 5. wait on each processes until it is confirmed dead
|
|
|
|
func (d *killer) kill(cgroup string, freeze func(), thaw func()) error {
|
|
|
|
// freeze the cgroup stopping further forking
|
|
|
|
freeze()
|
|
|
|
|
|
|
|
d.logger.Trace("search for pids in", "cgroup", cgroup)
|
|
|
|
|
|
|
|
// find all the pids we intend to kill
|
|
|
|
pids, err := cgroups.GetPids(cgroup)
|
|
|
|
if err != nil {
|
|
|
|
// if we fail to get pids, re-thaw before bailing so there is at least
|
|
|
|
// a chance the processes can go die out of band
|
|
|
|
thaw()
|
|
|
|
return fmt.Errorf("failed to find pids: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
d.logger.Trace("send sigkill to frozen processes", "cgroup", cgroup, "pids", pids)
|
|
|
|
|
|
|
|
var processes []*os.Process
|
|
|
|
|
|
|
|
// kill the processes in cgroup
|
|
|
|
for _, pid := range pids {
|
|
|
|
p, findErr := os.FindProcess(pid)
|
|
|
|
if findErr != nil {
|
|
|
|
d.logger.Trace("failed to find process of pid to kill", "pid", pid, "error", findErr)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
processes = append(processes, p)
|
|
|
|
if killErr := p.Kill(); killErr != nil {
|
|
|
|
d.logger.Trace("failed to kill process", "pid", pid, "error", killErr)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// thawed the cgroup so we can wait on each process
|
|
|
|
thaw()
|
|
|
|
|
|
|
|
// wait on each process
|
|
|
|
for _, p := range processes {
|
|
|
|
// do not capture error; errors are normal here
|
|
|
|
pState, _ := p.Wait()
|
|
|
|
d.logger.Trace("return from wait on process", "pid", p.Pid, "state", pState)
|
|
|
|
}
|
|
|
|
|
|
|
|
// cgroups are not atomic, the OS takes a moment to un-mark the cgroup as in-use;
|
|
|
|
// a tiny sleep here goes a long way for not creating noisy (but functionally benign)
|
|
|
|
// errors about removing busy cgroup
|
|
|
|
//
|
|
|
|
// alternatively we could do the removal in a loop and silence the interim errors, but meh
|
|
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|