open-nomad/client/lib/cgutil/cgutil_linux.go
Seth Hoenig 5138c5b99e
client: do not disable memory swappiness if kernel does not support it (#17625)
* client: do not disable memory swappiness if kernel does not support it

This PR adds a workaround for very old Linux kernels which do not support
the memory swappiness interface file. Normally we write a "0" to the file
to explicitly disable swap. In the case the kernel does not support it,
give libcontainer a nil value so it does not write anything.

Fixes #17448

* client: detect swappiness by writing to the file

* fixup changelog

Co-authored-by: James Rasell <jrasell@users.noreply.github.com>

---------

Co-authored-by: James Rasell <jrasell@users.noreply.github.com>
2023-06-22 09:36:31 -05:00

175 lines
5.2 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
//go:build linux
package cgutil
import (
"fmt"
"os"
"path/filepath"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/opencontainers/runc/libcontainer/cgroups"
lcc "github.com/opencontainers/runc/libcontainer/configs"
)
// UseV2 indicates whether only cgroups.v2 is enabled. If cgroups.v2 is not
// enabled or is running in hybrid mode with cgroups.v1, Nomad will make use of
// cgroups.v1
//
// This is a read-only value.
var UseV2 = safelyDetectUnifiedMode()
// Currently it is possible for the runc utility function to panic
// https://github.com/opencontainers/runc/pull/3745
func safelyDetectUnifiedMode() (result bool) {
defer func() {
if r := recover(); r != nil {
result = false
}
}()
result = cgroups.IsCgroup2UnifiedMode()
return
}
// GetCgroupParent returns the mount point under the root cgroup in which Nomad
// will create cgroups. If parent is not set, an appropriate name for the version
// of cgroups will be used.
func GetCgroupParent(parent string) string {
switch {
case parent != "":
return parent
case UseV2:
return DefaultCgroupParentV2
default:
return DefaultCgroupV1Parent
}
}
// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
switch {
case UseV2:
return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2"))
default:
return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1"))
}
}
// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.
func GetCPUsFromCgroup(group string) ([]uint16, error) {
group = GetCgroupParent(group)
if UseV2 {
return getCPUsFromCgroupV2(group)
}
return getCPUsFromCgroupV1(group)
}
// CgroupScope returns the name of the scope for Nomad's managed cgroups for
// the given allocID and task.
//
// e.g. "<allocID>.<task>.scope"
//
// Only useful for v2.
func CgroupScope(allocID, task string) string {
return fmt.Sprintf("%s.%s.scope", allocID, task)
}
// ConfigureBasicCgroups will initialize a cgroup and modify config to contain
// a reference to its path.
//
// v1: creates a random "freezer" cgroup which can later be used for cleanup of processes.
// v2: does nothing.
func ConfigureBasicCgroups(config *lcc.Config) error {
if UseV2 {
return nil
}
id := uuid.Generate()
// In v1 we must setup the freezer cgroup ourselves.
subsystem := "freezer"
path, err := GetCgroupPathHelperV1(subsystem, filepath.Join(DefaultCgroupV1Parent, id))
if err != nil {
return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err)
}
if err = os.MkdirAll(path, 0755); err != nil {
return err
}
config.Cgroups.Path = path
return nil
}
// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux
// system.
//
// Note that in cgroups.v1, this returns one of many subsystems that are mounted.
// e.g. a return value of "/sys/fs/cgroup/systemd" really implies the root is
// "/sys/fs/cgroup", which is interesting on hybrid systems where the 'unified'
// subsystem is mounted as if it were a subsystem, but the actual root is different.
// (i.e. /sys/fs/cgroup/unified).
//
// As far as Nomad is concerned, UseV2 is the source of truth for which hierarchy
// to use, and that will only be a true value if cgroups.v2 is mounted on
// /sys/fs/cgroup (i.e. system is not in v1 or hybrid mode).
//
// ➜ mount -l | grep cgroup
// tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755,inode64)
// cgroup2 on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate)
// cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd)
// cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
// (etc.)
func FindCgroupMountpointDir() (string, error) {
mount, err := cgroups.GetCgroupMounts(false)
if err != nil {
return "", err
}
// It's okay if the mount point is not discovered
if len(mount) == 0 {
return "", nil
}
return mount[0].Mountpoint, nil
}
// CopyCpuset copies the cpuset.cpus value from source into destination.
func CopyCpuset(source, destination string) error {
correct, err := cgroups.ReadFile(source, "cpuset.cpus")
if err != nil {
return err
}
err = cgroups.WriteFile(destination, "cpuset.cpus", correct)
if err != nil {
return err
}
return nil
}
// MaybeDisableMemorySwappiness will disable memory swappiness, if that controller
// is available. Always the case for cgroups v2, but is not always the case on
// very old kernels with cgroups v1.
func MaybeDisableMemorySwappiness() *uint64 {
bypass := (*uint64)(nil)
zero := pointer.Of[uint64](0)
// cgroups v2 always set zero
if UseV2 {
return zero
}
// cgroups v1 detect if swappiness is supported by attempting to write to
// the nomad parent cgroup swappiness interface
e := &editor{fromRoot: "memory/nomad"}
err := e.write("memory.swappiness", "0")
if err != nil {
return bypass
}
return zero
}