diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 000000000..4649a508c --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 7f8c0cf9d..62686975d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6608,7 +6608,7 @@ func (tg *TaskGroup) Validate(j *Job) error { canaries = tg.Update.Canary } for name, volReq := range tg.Volumes { - if err := volReq.Validate(tg.Count, canaries); err != nil { + if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf( "Task group volume validation for %s failed: %v", name, err)) } diff --git a/nomad/structs/volume_test.go b/nomad/structs/volume_test.go index fa86ec2fb..11415963b 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volume_test.go @@ -30,6 +30,7 @@ func TestVolumeRequest_Validate(t *testing.T) { "host volumes cannot have an access mode", "host volumes cannot have an attachment mode", "host volumes cannot have mount options", + "volume cannot be per_alloc for system or sysbatch jobs", "volume cannot be per_alloc when canaries are in use", }, canariesCount: 1, @@ -69,8 +70,11 @@ func TestVolumeRequest_Validate(t *testing.T) { }, }, { - name: "CSI volume per-alloc with canaries", - expected: []string{"volume cannot be per_alloc when canaries are in use"}, + name: "CSI volume per-alloc with canaries", + expected: []string{ + "volume cannot be per_alloc for system or sysbatch jobs", + "volume cannot be per_alloc when canaries are in use", + }, canariesCount: 1, req: &VolumeRequest{ Type: VolumeTypeCSI, @@ -81,7 +85,7 @@ func TestVolumeRequest_Validate(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount) + err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { require.Contains(t, err.Error(), expected) } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 563d7b1cf..d1ba01659 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -102,7 +102,7 @@ type VolumeRequest struct { PerAlloc bool } -func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { +func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error { if !(v.Type == VolumeTypeHost || v.Type == VolumeTypeCSI) { return fmt.Errorf("volume has unrecognized type %s", v.Type) @@ -116,6 +116,14 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { if v.Source == "" { addErr("volume has an empty source") } + if v.PerAlloc { + if jobType == JobTypeSystem || jobType == JobTypeSysBatch { + addErr("volume cannot be per_alloc for system or sysbatch jobs") + } + if canaries > 0 { + addErr("volume cannot be per_alloc when canaries are in use") + } + } switch v.Type { @@ -129,9 +137,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { if v.MountOptions != nil { addErr("host volumes cannot have mount options") } - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") - } case VolumeTypeCSI: @@ -170,11 +175,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { case CSIVolumeAccessModeMultiNodeMultiWriter: // note: we intentionally allow read-only mount of this mode } - - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") - } - } return mErr.ErrorOrNil() diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 9017e707d..1bace41c7 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -92,6 +93,15 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Ensure allocations have unique names derived from Job.ID + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Ensure different ports were used. used := make(map[int]map[string]struct{}) for _, alloc := range out { diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index fac543699..49ddbfc57 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all sysbatch allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 05a4ec6a2..40f83ceb9 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -66,6 +67,15 @@ func TestSystemSched_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all system allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/website/content/docs/job-specification/volume.mdx b/website/content/docs/job-specification/volume.mdx index df09b96f8..58ec2bd35 100644 --- a/website/content/docs/job-specification/volume.mdx +++ b/website/content/docs/job-specification/volume.mdx @@ -82,6 +82,9 @@ the [volume_mount][volume_mount] block in the `task` configuration. = true`, the allocation named `myjob.mygroup.mytask[0]` will require a volume ID `myvolume[0]`. + The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs + that use canaries. + The following fields are only valid for volumes with `type = "csi"`: - `access_mode` `(string: )` - Defines whether a volume should be diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index 612e0d01c..4263657ee 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -1,29 +1,29 @@ ### Job-related variables -| Variable | Description | -| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | -| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | -| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | -| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | -| `NOMAD_ALLOC_ID` | Allocation ID of the task | -| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task | -| `NOMAD_ALLOC_NAME` | Allocation name of the task | -| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | -| `NOMAD_TASK_NAME` | Task's name | -| `NOMAD_GROUP_NAME` | Group's name | -| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | -| `NOMAD_JOB_NAME` | Job's name | -| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | -| `NOMAD_DC` | Datacenter in which the allocation is running | -| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) | -| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | -| `NOMAD_REGION` | Region in which the allocation is running | -| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | -| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | +| Variable | Description | +|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | +| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | +| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | +| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | +| `NOMAD_ALLOC_ID` | Allocation ID of the task | +| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task | +| `NOMAD_ALLOC_NAME` | Allocation name of the task. This is derived from the job name, task group name, and allocation index. | +| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). For system jobs and sysbatch jobs, this value will always be 0. The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | +| `NOMAD_TASK_NAME` | Task's name | +| `NOMAD_GROUP_NAME` | Group's name | +| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | +| `NOMAD_JOB_NAME` | Job's name | +| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | +| `NOMAD_DC` | Datacenter in which the allocation is running | +| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) | +| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | +| `NOMAD_REGION` | Region in which the allocation is running | +| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | +| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | ### Network-related Variables