client: refactor cpuset manager initialization

This PR refactors the code path in Client startup for setting up the cpuset
cgroup manager (non-linux systems not affected).

Before, there was a logic bug where we would try to read the cpuset.cpus.effective
cgroup interface file before ensuring nomad's parent cgroup existed. Therefor that
file would not exist, and the list of useable cpus would be empty. Tasks started
thereafter would not have a value set for their cpuset.cpus.

The refactoring fixes some less than ideal coding style. Instead we now bootstrap
each cpuset manager type (v1/v2) within its own constructor. If something goes
awry during bootstrap (e.g. cgroups not enabled), the constructor returns the
noop implementation and logs a warning.

Fixes #14229
This commit is contained in:
Seth Hoenig 2022-08-23 09:03:37 -05:00 committed by Seth Hoenig
parent fd9744b9eb
commit 51384dd63f
11 changed files with 112 additions and 143 deletions

3
.changelog/14230.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where cpuset initialization would not work on first agent startup
```

View File

@ -8,7 +8,6 @@ import (
"net/rpc"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
@ -395,7 +394,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
invalidAllocs: make(map[string]struct{}),
serversContactedCh: make(chan struct{}),
serversContactedOnce: sync.Once{},
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger),
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger),
getter: getter.NewGetter(cfg.Artifact),
EnterpriseClient: newEnterpriseClient(logger),
}
@ -689,25 +688,8 @@ func (c *Client) init() error {
"reserved", reserved,
)
// Ensure cgroups are created on linux platform
if runtime.GOOS == "linux" && c.cpusetManager != nil {
// use the client configuration for reservable_cores if set
cores := conf.ReservableCores
if len(cores) == 0 {
// otherwise lookup the effective cores from the parent cgroup
cores, err = cgutil.GetCPUsFromCgroup(conf.CgroupParent)
if err != nil {
c.logger.Warn("failed to lookup cpuset from cgroup parent, and not set as reservable_cores", "parent", conf.CgroupParent)
// will continue with a disabled cpuset manager
}
}
if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil {
// If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager
// will be disabled. this is common when running in dev mode under a non-root user for example.
c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr)
c.cpusetManager = new(cgutil.NoopCpusetManager)
}
}
// startup the CPUSet manager
c.cpusetManager.Init()
// setup the check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)

View File

@ -15,6 +15,7 @@ import (
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/lib/cgutil"
regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
cstate "github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/command/agent/consul"
@ -738,6 +739,7 @@ func TestClient_Init(t *testing.T) {
client := &Client{
config: config,
logger: testlog.HCLogger(t),
cpusetManager: new(cgutil.NoopCpusetManager),
}
if err := client.init(); err != nil {

View File

@ -24,19 +24,25 @@ var UseV2 = cgroups.IsCgroup2UnifiedMode()
// will create cgroups. If parent is not set, an appropriate name for the version
// of cgroups will be used.
func GetCgroupParent(parent string) string {
if UseV2 {
return getParentV2(parent)
switch {
case parent != "":
return parent
case UseV2:
return DefaultCgroupParentV2
default:
return DefaultCgroupV1Parent
}
return getParentV1(parent)
}
// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager {
func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
if UseV2 {
return NewCpusetManagerV2(parent, logger.Named("cpuset.v2"))
switch {
case UseV2:
return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2"))
default:
return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1"))
}
return NewCpusetManagerV1(parent, logger.Named("cpuset.v1"))
}
// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.

View File

@ -13,6 +13,7 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
@ -58,19 +59,21 @@ func TestUtil_CreateCPUSetManager(t *testing.T) {
t.Run("v1", func(t *testing.T) {
testutil.CgroupsCompatibleV1(t)
parent := "/" + uuid.Short()
manager := CreateCPUSetManager(parent, logger)
err := manager.Init([]uint16{0})
require.NoError(t, err)
require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
manager := CreateCPUSetManager(parent, []uint16{0}, logger)
manager.Init()
_, ok := manager.(*cpusetManagerV1)
must.True(t, ok)
must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
})
t.Run("v2", func(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
parent := uuid.Short() + ".slice"
manager := CreateCPUSetManager(parent, logger)
err := manager.Init([]uint16{0})
require.NoError(t, err)
require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
manager := CreateCPUSetManager(parent, []uint16{0}, logger)
manager.Init()
_, ok := manager.(*cpusetManagerV2)
must.True(t, ok)
must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
})
}

View File

@ -17,7 +17,7 @@ const (
var UseV2 = false
// CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems.
func CreateCPUSetManager(string, hclog.Logger) CpusetManager {
func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager {
return new(NoopCpusetManager)
}

View File

@ -18,10 +18,9 @@ const (
// CpusetManager is used to setup cpuset cgroups for each task.
type CpusetManager interface {
// Init should be called with the initial set of reservable cores before any
// allocations are managed. Ensures the parent cgroup exists and proper permissions
// are available for managing cgroups.
Init([]uint16) error
// Init should be called before the client starts running allocations. This
// is where the cpuset manager should start doing background operations.
Init()
// AddAlloc adds an allocation to the manager
AddAlloc(alloc *structs.Allocation)
@ -36,8 +35,7 @@ type CpusetManager interface {
type NoopCpusetManager struct{}
func (n NoopCpusetManager) Init([]uint16) error {
return nil
func (n NoopCpusetManager) Init() {
}
func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) {

View File

@ -29,12 +29,50 @@ const (
)
// NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1
func NewCpusetManagerV1(cgroupParent string, logger hclog.Logger) CpusetManager {
func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager {
if cgroupParent == "" {
cgroupParent = DefaultCgroupV1Parent
}
cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent)
if err != nil {
logger.Warn("failed to get cgroup path; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
if err != nil {
logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
parentCpuset, err := cpuset.Parse(parentCpus)
if err != nil {
logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
// ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup
if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil {
logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil {
logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
return &cpusetManagerV1{
parentCpuset: parentCpuset,
cgroupParent: cgroupParent,
cgroupParentPath: cgroupParentPath,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: logger,
}
@ -140,48 +178,11 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo
// Init checks that the cgroup parent and expected child cgroups have been created
// If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared
// cgroup is initialized.
func (c *cpusetManagerV1) Init(_ []uint16) error {
cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", c.cgroupParent)
if err != nil {
return err
}
c.cgroupParentPath = cgroupParentPath
// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
return err
}
parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
if err != nil {
return fmt.Errorf("failed to detect parent cpuset settings: %v", err)
}
c.parentCpuset, err = cpuset.Parse(parentCpus)
if err != nil {
return fmt.Errorf("failed to parse parent cpuset.cpus setting: %v", err)
}
// ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup
if err := os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err == nil {
// cgroup created, leave cpuset.cpus empty but copy cpuset.mems from parent
if err != nil {
return err
}
} else if !os.IsExist(err) {
return err
}
if err := cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil {
return err
}
func (c *cpusetManagerV1) Init() {
c.doneCh = make(chan struct{})
c.signalCh = make(chan struct{})
c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String())
go c.reconcileLoop()
return nil
}
func (c *cpusetManagerV1) reconcileLoop() {
@ -339,13 +340,6 @@ func getCPUsFromCgroupV1(group string) ([]uint16, error) {
return stats.CPUSetStats.CPUs, nil
}
func getParentV1(parent string) string {
if parent == "" {
return DefaultCgroupV1Parent
}
return parent
}
// cpusetEnsureParentV1 makes sure that the parent directories of current
// are created and populated with the proper cpus and mems files copied
// from their respective parent. It does that recursively, starting from

View File

@ -16,7 +16,7 @@ import (
"github.com/stretchr/testify/require"
)
func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) {
func tmpCpusetManagerV1(t *testing.T) (*cpusetManagerV1, func()) {
mount, err := FindCgroupMountpointDir()
if err != nil || mount == "" {
t.Skipf("Failed to find cgroup mount: %v %v", mount, err)
@ -25,15 +25,10 @@ func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func())
parent := "/gotest-" + uuid.Short()
require.NoError(t, cpusetEnsureParentV1(parent))
manager = &cpusetManagerV1{
cgroupParent: parent,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: testlog.HCLogger(t),
}
parentPath, err := GetCgroupPathHelperV1("cpuset", parent)
require.NoError(t, err)
manager := NewCpusetManagerV1(parent, nil, testlog.HCLogger(t)).(*cpusetManagerV1)
return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) }
}
@ -42,7 +37,7 @@ func TestCpusetManager_V1_Init(t *testing.T) {
manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()
require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName))
require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus"))
@ -59,7 +54,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) {
manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()
alloc := mock.Alloc()
// reserve just one core (the 0th core, which probably exists)
@ -114,7 +109,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) {
manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()
alloc1 := mock.Alloc()
alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0])

View File

@ -52,24 +52,35 @@ type cpusetManagerV2 struct {
isolating map[identity]cpuset.CPUSet // isolating tasks using cores from the pool + reserved cores
}
func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager {
func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
parentAbs := filepath.Join(CgroupRoot, parent)
if err := os.MkdirAll(parentAbs, 0o755); err != nil {
logger.Warn("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}
if len(reservable) == 0 {
// read from group
if cpus, err := GetCPUsFromCgroup(parent); err != nil {
logger.Warn("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err)
return new(NoopCpusetManager)
} else {
reservable = cpus
}
}
return &cpusetManagerV2{
initial: cpuset.New(reservable...),
parent: parent,
parentAbs: filepath.Join(CgroupRoot, parent),
parentAbs: parentAbs,
logger: logger,
sharing: make(map[identity]nothing),
isolating: make(map[identity]cpuset.CPUSet),
}
}
func (c *cpusetManagerV2) Init(cores []uint16) error {
c.logger.Debug("initializing with", "cores", cores)
if err := c.ensureParent(); err != nil {
c.logger.Error("failed to init cpuset manager", "err", err)
return err
}
c.initial = cpuset.New(cores...)
return nil
func (c *cpusetManagerV2) Init() {
c.logger.Debug("initializing with", "cores", c.initial)
}
func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) {
@ -284,22 +295,6 @@ func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) {
}
}
// ensureParentCgroup will create parent cgroup for the manager if it does not
// exist yet. No PIDs are added to any cgroup yet.
func (c *cpusetManagerV2) ensureParent() error {
mgr, err := fs2.NewManager(nil, c.parentAbs)
if err != nil {
return err
}
if err = mgr.Apply(CreationPID); err != nil {
return err
}
c.logger.Trace("establish cgroup hierarchy", "parent", c.parent)
return nil
}
// fromRoot returns the joined filepath of group on the CgroupRoot
func fromRoot(group string) string {
return filepath.Join(CgroupRoot, group)
@ -319,12 +314,3 @@ func getCPUsFromCgroupV2(group string) ([]uint16, error) {
}
return set.ToSlice(), nil
}
// getParentV2 returns parent if set, otherwise the default name of Nomad's
// parent cgroup (i.e. nomad.slice).
func getParentV2(parent string) string {
if parent == "" {
return DefaultCgroupParentV2
}
return parent
}

View File

@ -32,8 +32,8 @@ func TestCpusetManager_V2_AddAlloc(t *testing.T) {
cleanup(t, parent)
// setup the cpuset manager
manager := NewCpusetManagerV2(parent, logger)
require.NoError(t, manager.Init(systemCores))
manager := NewCpusetManagerV2(parent, systemCores, logger)
manager.Init()
// add our first alloc, isolating 1 core
t.Run("first", func(t *testing.T) {
@ -72,8 +72,8 @@ func TestCpusetManager_V2_RemoveAlloc(t *testing.T) {
cleanup(t, parent)
// setup the cpuset manager
manager := NewCpusetManagerV2(parent, logger)
require.NoError(t, manager.Init(systemCores))
manager := NewCpusetManagerV2(parent, systemCores, logger)
manager.Init()
// alloc1 gets core 0
alloc1 := mock.Alloc()