client: set environment variable indicating set of reserved cpu cores

This PR injects the 'NOMAD_CPU_CORES' environment variable into
tasks that have been allocated reserved cpu cores. The value uses
normal cpuset notation, as found in cpuset.cpu cgroup interface files.

Note this value is not necessiarly the same as the content of the actual
cpuset.cpus interface file, which will also include shared cpu cores when
using cgroups v2. This variable is a workaround for users who used to be
able to read the reserved cgroup cpuset file, but lose the information
about distinct reserved cores when using cgroups v2.

Side discussion in: https://github.com/hashicorp/nomad/issues/12374
This commit is contained in:
Seth Hoenig 2022-04-07 08:52:07 -05:00
parent 1724765096
commit 0870aa31dc
3 changed files with 26 additions and 2 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/hashicorp/nomad/helper"
hargs "github.com/hashicorp/nomad/helper/args"
"github.com/hashicorp/nomad/lib/cpuset"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/zclconf/go-cty/cty"
@ -40,6 +41,9 @@ const (
// CpuLimit is the environment variable with the tasks CPU limit in MHz.
CpuLimit = "NOMAD_CPU_LIMIT"
// CpuCores is the environment variable for passing the task's reserved cpu cores
CpuCores = "NOMAD_CPU_CORES"
// AllocID is the environment variable for passing the allocation ID.
AllocID = "NOMAD_ALLOC_ID"
@ -397,6 +401,7 @@ type Builder struct {
// clientTaskSecretsDir is the secrets dir from the client's perspective; eg <client_task_root>/secrets
clientTaskSecretsDir string
cpuCores string
cpuLimit int64
memLimit int64
memMaxLimit int64
@ -493,6 +498,9 @@ func (b *Builder) buildEnv(allocDir, localDir, secretsDir string,
if b.cpuLimit != 0 {
envMap[CpuLimit] = strconv.FormatInt(b.cpuLimit, 10)
}
if b.cpuCores != "" {
envMap[CpuCores] = b.cpuCores
}
// Add the task metadata
if b.allocId != "" {
@ -742,6 +750,7 @@ func (b *Builder) setAlloc(alloc *structs.Allocation) *Builder {
// Populate task resources
if tr, ok := alloc.AllocatedResources.Tasks[b.taskName]; ok {
b.cpuLimit = tr.Cpu.CpuShares
b.cpuCores = cpuset.New(tr.Cpu.ReservedCores...).String()
b.memLimit = tr.Memory.MemoryMB
b.memMaxLimit = tr.Memory.MemoryMaxMB
@ -788,7 +797,7 @@ func (b *Builder) setAlloc(alloc *structs.Allocation) *Builder {
}
}
upstreams := []structs.ConsulUpstream{}
var upstreams []structs.ConsulUpstream
for _, svc := range tg.Services {
if svc.Connect.HasSidecar() && svc.Connect.SidecarService.HasUpstreams() {
upstreams = append(upstreams, svc.Connect.SidecarService.Proxy.Upstreams...)

View File

@ -159,7 +159,10 @@ func TestEnvironment_AsList(t *testing.T) {
a := mock.Alloc()
a.Job.ParentID = fmt.Sprintf("mock-parent-service-%s", uuid.Generate())
a.AllocatedResources.Tasks["web"] = &structs.AllocatedTaskResources{
Cpu: structs.AllocatedCpuResources{CpuShares: 500},
Cpu: structs.AllocatedCpuResources{
CpuShares: 500,
ReservedCores: []uint16{0, 5, 6, 7},
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 256,
MemoryMaxMB: 512,
@ -215,6 +218,7 @@ func TestEnvironment_AsList(t *testing.T) {
"NOMAD_PORT_ssh_other=1234",
"NOMAD_PORT_ssh_ssh=22",
"NOMAD_CPU_LIMIT=500",
"NOMAD_CPU_CORES=0,5-7",
"NOMAD_DC=dc1",
"NOMAD_NAMESPACE=not-default",
"NOMAD_REGION=global",
@ -260,6 +264,7 @@ func TestEnvironment_AllValues(t *testing.T) {
MBits: 50,
DynamicPorts: []structs.Port{{Label: "http", Value: 80}},
}
a.AllocatedResources.Tasks["web"].Cpu.ReservedCores = []uint16{0, 5, 6, 7}
a.AllocatedResources.Tasks["ssh"] = &structs.AllocatedTaskResources{
Networks: []*structs.NetworkResource{
{
@ -378,6 +383,7 @@ func TestEnvironment_AllValues(t *testing.T) {
"NOMAD_PORT_ssh_other": "1234",
"NOMAD_PORT_ssh_ssh": "22",
"NOMAD_CPU_LIMIT": "500",
"NOMAD_CPU_CORES": "0,5-7",
"NOMAD_DC": "dc1",
"NOMAD_PARENT_CGROUP": "abc.slice",
"NOMAD_NAMESPACE": "default",

View File

@ -54,6 +54,15 @@
</td>
<td>CPU limit in MHz for the task</td>
</tr>
<tr>
<td>
<code>NOMAD_CPU_CORES</code>
</td>
<td>
The specific CPU cores reserved for the task in cpuset list notation.
Omitted if the the task does not request cpu cores. E.g. <code>0-2,7,12-14</code>
</td>
</tr>
<tr>
<td>
<code>NOMAD_ALLOC_ID</code>