52aaf86f52
This PR adds support for the raw_exec driver on systems with only cgroups v2. The raw exec driver is able to use cgroups to manage processes. This happens only on Linux, when exec_driver is enabled, and the no_cgroups option is not set. The driver uses the freezer controller to freeze processes of a task, issue a sigkill, then unfreeze. Previously the implementation assumed cgroups v1, and now it also supports cgroups v2. There is a bit of refactoring in this PR, but the fundamental design remains the same. Closes #12351 #12348
132 lines
4.1 KiB
Go
132 lines
4.1 KiB
Go
//go:build linux
|
|
|
|
package cgutil
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
lcc "github.com/opencontainers/runc/libcontainer/configs"
|
|
)
|
|
|
|
// UseV2 indicates whether only cgroups.v2 is enabled. If cgroups.v2 is not
|
|
// enabled or is running in hybrid mode with cgroups.v1, Nomad will make use of
|
|
// cgroups.v1
|
|
//
|
|
// This is a read-only value.
|
|
var UseV2 = cgroups.IsCgroup2UnifiedMode()
|
|
|
|
// GetCgroupParent returns the mount point under the root cgroup in which Nomad
|
|
// will create cgroups. If parent is not set, an appropriate name for the version
|
|
// of cgroups will be used.
|
|
func GetCgroupParent(parent string) string {
|
|
if UseV2 {
|
|
return getParentV2(parent)
|
|
}
|
|
return getParentV1(parent)
|
|
}
|
|
|
|
// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
|
|
func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager {
|
|
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
|
|
if UseV2 {
|
|
return NewCpusetManagerV2(parent, logger.Named("cpuset.v2"))
|
|
}
|
|
return NewCpusetManagerV1(parent, logger.Named("cpuset.v1"))
|
|
}
|
|
|
|
// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.
|
|
func GetCPUsFromCgroup(group string) ([]uint16, error) {
|
|
group = GetCgroupParent(group)
|
|
if UseV2 {
|
|
return getCPUsFromCgroupV2(group)
|
|
}
|
|
return getCPUsFromCgroupV1(group)
|
|
}
|
|
|
|
// CgroupScope returns the name of the scope for Nomad's managed cgroups for
|
|
// the given allocID and task.
|
|
//
|
|
// e.g. "<allocID>.<task>.scope"
|
|
//
|
|
// Only useful for v2.
|
|
func CgroupScope(allocID, task string) string {
|
|
return fmt.Sprintf("%s.%s.scope", allocID, task)
|
|
}
|
|
|
|
// ConfigureBasicCgroups will initialize a cgroup and modify config to contain
|
|
// a reference to its path.
|
|
//
|
|
// v1: creates a random "freezer" cgroup which can later be used for cleanup of processes.
|
|
// v2: does nothing.
|
|
func ConfigureBasicCgroups(config *lcc.Config) error {
|
|
if UseV2 {
|
|
return nil
|
|
}
|
|
|
|
id := uuid.Generate()
|
|
// In v1 we must setup the freezer cgroup ourselves.
|
|
subsystem := "freezer"
|
|
path, err := GetCgroupPathHelperV1(subsystem, filepath.Join(DefaultCgroupV1Parent, id))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err)
|
|
}
|
|
if err = os.MkdirAll(path, 0755); err != nil {
|
|
return err
|
|
}
|
|
config.Cgroups.Paths = map[string]string{
|
|
subsystem: path,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux
|
|
// system.
|
|
//
|
|
// Note that in cgroups.v1, this returns one of many subsystems that are mounted.
|
|
// e.g. a return value of "/sys/fs/cgroup/systemd" really implies the root is
|
|
// "/sys/fs/cgroup", which is interesting on hybrid systems where the 'unified'
|
|
// subsystem is mounted as if it were a subsystem, but the actual root is different.
|
|
// (i.e. /sys/fs/cgroup/unified).
|
|
//
|
|
// As far as Nomad is concerned, UseV2 is the source of truth for which hierarchy
|
|
// to use, and that will only be a true value if cgroups.v2 is mounted on
|
|
// /sys/fs/cgroup (i.e. system is not in v1 or hybrid mode).
|
|
//
|
|
// ➜ mount -l | grep cgroup
|
|
// tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755,inode64)
|
|
// cgroup2 on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate)
|
|
// cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd)
|
|
// cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
|
|
// (etc.)
|
|
func FindCgroupMountpointDir() (string, error) {
|
|
mount, err := cgroups.GetCgroupMounts(false)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// It's okay if the mount point is not discovered
|
|
if len(mount) == 0 {
|
|
return "", nil
|
|
}
|
|
return mount[0].Mountpoint, nil
|
|
}
|
|
|
|
// CopyCpuset copies the cpuset.cpus value from source into destination.
|
|
func CopyCpuset(source, destination string) error {
|
|
correct, err := cgroups.ReadFile(source, "cpuset.cpus")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = cgroups.WriteFile(destination, "cpuset.cpus", correct)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|