Fix DevicesSets being removed when cpusets are reloaded with cgroup v2 (#17535)
* Fix DevicesSets being removed when cpusets are reloaded with cgroup v2 This meant that if any allocation was created or removed, all active DevicesSets were removed from all cgroups of all tasks. This was most noticeable with "exec" and "raw_exec", as it meant they no longer had access to /dev files. * e2e: add test for verifying cgroups do not interfere with access to devices --------- Co-authored-by: Seth Hoenig <shoenig@duck.com>
This commit is contained in:
parent
2856967dda
commit
4767d44b94
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
cgroups: Fixed a bug removing all DevicesSets when alloc is created/removed
|
||||
```
|
|
@ -330,7 +330,8 @@ func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) {
|
|||
|
||||
// set the cpuset value for the cgroup
|
||||
if err = m.Set(&configs.Resources{
|
||||
CpusetCpus: set.String(),
|
||||
CpusetCpus: set.String(),
|
||||
SkipDevices: true,
|
||||
}); err != nil {
|
||||
c.logger.Error("failed to set cgroup", "path", path, "error", err)
|
||||
return
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package isolation
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
func TestCgroupDevices(t *testing.T) {
|
||||
nomad := e2eutil.NomadClient(t)
|
||||
|
||||
e2eutil.WaitForLeader(t, nomad)
|
||||
e2eutil.WaitForNodesReady(t, nomad, 1)
|
||||
|
||||
t.Run("testDevicesUsable", testDevicesUsable)
|
||||
}
|
||||
|
||||
func testDevicesUsable(t *testing.T) {
|
||||
nomad := e2eutil.NomadClient(t)
|
||||
|
||||
jobID := "cgroup-devices-" + uuid.Short()
|
||||
jobIDs := []string{jobID}
|
||||
t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs))
|
||||
|
||||
// start job
|
||||
allocs := e2eutil.RegisterAndWaitForAllocs(t, nomad, "./input/cgroup_devices.hcl", jobID, "")
|
||||
must.Len(t, 2, allocs)
|
||||
|
||||
// pick one to stop and one to verify
|
||||
allocA := allocs[0].ID
|
||||
allocB := allocs[1].ID
|
||||
|
||||
// verify devices are working
|
||||
checkDev(t, allocA)
|
||||
checkDev(t, allocB)
|
||||
|
||||
// stop the chosen alloc
|
||||
_, err := e2eutil.Command("nomad", "alloc", "stop", "-detach", allocA)
|
||||
must.NoError(t, err)
|
||||
e2eutil.WaitForAllocStopped(t, nomad, allocA)
|
||||
|
||||
// verify device of remaining alloc
|
||||
checkDev(t, allocB)
|
||||
}
|
||||
|
||||
func checkDev(t *testing.T, allocID string) {
|
||||
_, err := e2eutil.Command("nomad", "alloc", "exec", allocID, "dd", "if=/dev/zero", "of=/dev/null", "count=1")
|
||||
must.NoError(t, err)
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
job "cgroup_devices" {
|
||||
type = "service"
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
group "group1" {
|
||||
|
||||
task "task1" {
|
||||
driver = "raw_exec"
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
args = ["infinity"]
|
||||
}
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 50
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group "group2" {
|
||||
|
||||
task "task2" {
|
||||
driver = "raw_exec"
|
||||
config {
|
||||
command = "/bin/sleep"
|
||||
args = ["infinity"]
|
||||
}
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 50
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue