client: ensure minimal cgroup controllers enabled (#15027)
* client: ensure minimal cgroup controllers enabled This PR fixes a bug where Nomad could not operate properly on operating systems that set the root cgroup.subtree_control to a set of controllers that do not include the minimal set of controllers needed by Nomad. Nomad needs these controllers enabled to operate: - cpuset - cpu - io - memory - pids Now, Nomad will ensure these controllers are enabled during Client initialization, adding them to cgroup.subtree_control as necessary. This should be particularly helpful on the RHEL/CentOS/Fedora family of system. Ubuntu systems should be unaffected as they enable all controllers by default. Fixes: https://github.com/hashicorp/nomad/issues/14494 * docs: cleanup doc string * client: cleanup controller writes, enhance log messages
This commit is contained in:
parent
c45d9a9ea8
commit
d69556fb35
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
client: Fixed a bug where Nomad could not detect cores on recent RHEL systems
|
||||||
|
```
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
|
"github.com/hashicorp/go-set"
|
||||||
"github.com/hashicorp/nomad/helper"
|
"github.com/hashicorp/nomad/helper"
|
||||||
"github.com/hashicorp/nomad/lib/cpuset"
|
"github.com/hashicorp/nomad/lib/cpuset"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
|
@ -54,16 +55,21 @@ type cpusetManagerV2 struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
|
func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
|
||||||
|
if err := minimumRootControllers(); err != nil {
|
||||||
|
logger.Error("failed to enabled minimum set of cgroup controllers; disabling cpuset management", "error", err)
|
||||||
|
return new(NoopCpusetManager)
|
||||||
|
}
|
||||||
|
|
||||||
parentAbs := filepath.Join(CgroupRoot, parent)
|
parentAbs := filepath.Join(CgroupRoot, parent)
|
||||||
if err := os.MkdirAll(parentAbs, 0o755); err != nil {
|
if err := os.MkdirAll(parentAbs, 0o755); err != nil {
|
||||||
logger.Warn("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err)
|
logger.Error("failed to ensure nomad parent cgroup exists; disabling cpuset management", "error", err)
|
||||||
return new(NoopCpusetManager)
|
return new(NoopCpusetManager)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(reservable) == 0 {
|
if len(reservable) == 0 {
|
||||||
// read from group
|
// read from group
|
||||||
if cpus, err := GetCPUsFromCgroup(parent); err != nil {
|
if cpus, err := GetCPUsFromCgroup(parent); err != nil {
|
||||||
logger.Warn("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err)
|
logger.Error("failed to lookup cpus from parent cgroup; disabling cpuset management", "error", err)
|
||||||
return new(NoopCpusetManager)
|
return new(NoopCpusetManager)
|
||||||
} else {
|
} else {
|
||||||
reservable = cpus
|
reservable = cpus
|
||||||
|
@ -80,6 +86,33 @@ func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// minimumControllers sets the minimum set of required controllers on the
|
||||||
|
// /sys/fs/cgroup/cgroup.subtree_control file - ensuring [cpuset, cpu, io, memory, pids]
|
||||||
|
// are enabled.
|
||||||
|
func minimumRootControllers() error {
|
||||||
|
e := new(editor)
|
||||||
|
s, err := e.read("cgroup.subtree_control")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
required := set.From[string]([]string{"cpuset", "cpu", "io", "memory", "pids"})
|
||||||
|
enabled := set.From[string](strings.Fields(s))
|
||||||
|
needed := required.Difference(enabled)
|
||||||
|
|
||||||
|
if needed.Size() == 0 {
|
||||||
|
return nil // already sufficient
|
||||||
|
}
|
||||||
|
|
||||||
|
sb := new(strings.Builder)
|
||||||
|
for _, controller := range needed.List() {
|
||||||
|
sb.WriteString("+" + controller + " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
activation := strings.TrimSpace(sb.String())
|
||||||
|
return e.write("cgroup.subtree_control", activation)
|
||||||
|
}
|
||||||
|
|
||||||
func (c *cpusetManagerV2) Init() {
|
func (c *cpusetManagerV2) Init() {
|
||||||
c.logger.Debug("initializing with", "cores", c.initial)
|
c.logger.Debug("initializing with", "cores", c.initial)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue