open-nomad/client/lib/resources/containment_linux.go
Seth Hoenig 52aaf86f52 raw_exec: make raw exec driver work with cgroups v2
This PR adds support for the raw_exec driver on systems with only cgroups v2.

The raw exec driver is able to use cgroups to manage processes. This happens
only on Linux, when exec_driver is enabled, and the no_cgroups option is not
set. The driver uses the freezer controller to freeze processes of a task,
issue a sigkill, then unfreeze. Previously the implementation assumed cgroups
v1, and now it also supports cgroups v2.

There is a bit of refactoring in this PR, but the fundamental design remains
the same.

Closes #12351 #12348
2022-04-04 16:11:38 -05:00

108 lines
2.5 KiB
Go

//go:build linux
package resources
import (
"fmt"
"os"
"path/filepath"
"sync"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
)
type containment struct {
lock sync.RWMutex
cgroup *configs.Cgroup
logger hclog.Logger
}
func Contain(logger hclog.Logger, cgroup *configs.Cgroup) *containment {
return &containment{
cgroup: cgroup,
logger: logger.Named("containment"),
}
}
func (c *containment) Apply(pid int) error {
c.lock.Lock()
defer c.lock.Unlock()
c.logger.Trace("create containment for", "cgroup", c.cgroup, "pid", pid)
// for v2 use manager to create and enter the cgroup
if cgutil.UseV2 {
mgr, err := fs2.NewManager(c.cgroup, "", false)
if err != nil {
return fmt.Errorf("failed to create v2 cgroup manager for containment: %w", err)
}
// add the pid to the cgroup
if err = mgr.Apply(pid); err != nil {
return fmt.Errorf("failed to apply v2 cgroup containment: %w", err)
}
// in v2 it is important to set the device resource configuration
if err = mgr.Set(c.cgroup.Resources); err != nil {
return fmt.Errorf("failed to set v2 cgroup resources: %w", err)
}
return nil
}
// for v1 a random cgroup was created already; just enter it
if err := cgroups.EnterPid(c.cgroup.Paths, pid); err != nil {
return fmt.Errorf("failed to add pid to v1 cgroup: %w", err)
}
return nil
}
func (c *containment) Cleanup() error {
c.lock.Lock()
defer c.lock.Unlock()
// the current pid is of the executor, who manages the task process cleanup
executorPID := os.Getpid()
c.logger.Trace("cleanup on", "cgroup", c.cgroup, "executor_pid", executorPID)
// destroy the task processes
destroyer := cgutil.NewGroupKiller(c.logger, executorPID)
return destroyer.KillGroup(c.cgroup)
}
func (c *containment) GetPIDs() PIDs {
c.lock.Lock()
defer c.lock.Unlock()
m := make(PIDs)
if c.cgroup == nil {
return m
}
// get the cgroup path under containment
var path string
if cgutil.UseV2 {
path = filepath.Join(cgutil.CgroupRoot, c.cgroup.Path)
} else {
path = c.cgroup.Paths["freezer"]
}
// find the pids in the cgroup under containment
pids, err := cgroups.GetAllPids(path)
if err != nil {
c.logger.Debug("failed to get pids", "cgroup", c.cgroup, "error", err)
return m
}
for _, pid := range pids {
m[pid] = NewPID(pid)
}
return m
}