Allow per_alloc to be used with host volumes (#15780)

Disallowing per_alloc for host volumes in some cases makes life of a nomad user much harder.
When we rely on the NOMAD_ALLOC_INDEX for any configuration that needs to be re-used across
restarts we need to make sure allocation placement is consistent. With CSI volumes we can
use the `per_alloc` feature but for some reason this is explicitly disabled for host volumes.

Ensure host volumes understand the concept of per_alloc
This commit is contained in:
Yorick Gersie 2023-01-26 15:14:47 +01:00 committed by GitHub
parent f4d6efe69f
commit 2a5c423ae0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 66 additions and 28 deletions

3
.changelog/15780.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
volumes: Allow `per_alloc` to be used with host_volumes
```

View File

@ -64,6 +64,9 @@ type TaskPrestartRequest struct {
// TaskEnv is the task's environment
TaskEnv *taskenv.TaskEnv
// Alloc is the current version of the allocation
Alloc *structs.Allocation
}
type TaskPrestartResponse struct {

View File

@ -208,6 +208,8 @@ func (tr *TaskRunner) prestart() error {
joinedCtx, joinedCancel := joincontext.Join(tr.killCtx, tr.shutdownCtx)
defer joinedCancel()
alloc := tr.Alloc()
for _, hook := range tr.runnerHooks {
pre, ok := hook.(interfaces.TaskPrestartHook)
if !ok {
@ -218,6 +220,7 @@ func (tr *TaskRunner) prestart() error {
// Build the request
req := interfaces.TaskPrestartRequest{
Alloc: alloc,
Task: tr.Task(),
TaskDir: tr.taskDir,
TaskEnv: tr.envBuilder.Build(),

View File

@ -32,7 +32,7 @@ func (*volumeHook) Name() string {
return "volumes"
}
func validateHostVolumes(requestedByAlias map[string]*structs.VolumeRequest, clientVolumesByName map[string]*structs.ClientHostVolumeConfig) error {
func validateHostVolumes(requestedByAlias map[string]*structs.VolumeRequest, clientVolumesByName map[string]*structs.ClientHostVolumeConfig, allocName string) error {
var result error
for _, req := range requestedByAlias {
@ -42,9 +42,14 @@ func validateHostVolumes(requestedByAlias map[string]*structs.VolumeRequest, cli
continue
}
_, ok := clientVolumesByName[req.Source]
source := req.Source
if req.PerAlloc {
source = source + structs.AllocSuffix(allocName)
}
_, ok := clientVolumesByName[source]
if !ok {
result = multierror.Append(result, fmt.Errorf("missing %s", req.Source))
result = multierror.Append(result, fmt.Errorf("missing %s", source))
}
}
@ -54,7 +59,7 @@ func validateHostVolumes(requestedByAlias map[string]*structs.VolumeRequest, cli
// hostVolumeMountConfigurations takes the users requested volume mounts,
// volumes, and the client host volume configuration and converts them into a
// format that can be used by drivers.
func (h *volumeHook) hostVolumeMountConfigurations(taskMounts []*structs.VolumeMount, taskVolumesByAlias map[string]*structs.VolumeRequest, clientVolumesByName map[string]*structs.ClientHostVolumeConfig) ([]*drivers.MountConfig, error) {
func (h *volumeHook) hostVolumeMountConfigurations(taskMounts []*structs.VolumeMount, taskVolumesByAlias map[string]*structs.VolumeRequest, clientVolumesByName map[string]*structs.ClientHostVolumeConfig, allocName string) ([]*drivers.MountConfig, error) {
var mounts []*drivers.MountConfig
for _, m := range taskMounts {
req, ok := taskVolumesByAlias[m.Volume]
@ -71,11 +76,15 @@ func (h *volumeHook) hostVolumeMountConfigurations(taskMounts []*structs.VolumeM
continue
}
hostVolume, ok := clientVolumesByName[req.Source]
source := req.Source
if req.PerAlloc {
source = source + structs.AllocSuffix(allocName)
}
hostVolume, ok := clientVolumesByName[source]
if !ok {
// Should never happen, but unless the client volumes were mutated during
// the execution of this hook.
return nil, fmt.Errorf("No host volume named: %s", req.Source)
return nil, fmt.Errorf("no host volume named: %s", source)
}
mcfg := &drivers.MountConfig{
@ -110,12 +119,12 @@ func (h *volumeHook) prepareHostVolumes(req *interfaces.TaskPrestartRequest, vol
// Always validate volumes to ensure that we do not allow volumes to be used
// if a host is restarted and loses the host volume configuration.
if err := validateHostVolumes(volumes, hostVolumes); err != nil {
if err := validateHostVolumes(volumes, hostVolumes, req.Alloc.Name); err != nil {
h.logger.Error("Requested Host Volume does not exist", "existing", hostVolumes, "requested", volumes)
return nil, fmt.Errorf("host volume validation error: %v", err)
}
hostVolumeMounts, err := h.hostVolumeMountConfigurations(req.Task.VolumeMounts, volumes, hostVolumes)
hostVolumeMounts, err := h.hostVolumeMountConfigurations(req.Task.VolumeMounts, volumes, hostVolumes, req.Alloc.Name)
if err != nil {
h.logger.Error("Failed to generate host volume mounts", "error", err)
return nil, err

View File

@ -30,8 +30,9 @@ func TestVolumeRequest_Validate(t *testing.T) {
"host volumes cannot have an access mode",
"host volumes cannot have an attachment mode",
"host volumes cannot have mount options",
"host volumes do not support per_alloc",
"volume cannot be per_alloc when canaries are in use",
},
canariesCount: 1,
req: &VolumeRequest{
Type: VolumeTypeHost,
ReadOnly: false,
@ -79,7 +80,6 @@ func TestVolumeRequest_Validate(t *testing.T) {
}
for _, tc := range testCases {
tc = tc
t.Run(tc.name, func(t *testing.T) {
err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount)
for _, expected := range tc.expected {

View File

@ -129,8 +129,8 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
if v.MountOptions != nil {
addErr("host volumes cannot have mount options")
}
if v.PerAlloc {
addErr("host volumes do not support per_alloc")
if v.PerAlloc && canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
}
case VolumeTypeCSI:

View File

@ -149,7 +149,7 @@ func NewHostVolumeChecker(ctx Context) *HostVolumeChecker {
}
// SetVolumes takes the volumes required by a task group and updates the checker.
func (h *HostVolumeChecker) SetVolumes(volumes map[string]*structs.VolumeRequest) {
func (h *HostVolumeChecker) SetVolumes(allocName string, volumes map[string]*structs.VolumeRequest) {
lookupMap := make(map[string][]*structs.VolumeRequest)
// Convert the map from map[DesiredName]Request to map[Source][]Request to improve
// lookup performance. Also filter non-host volumes.
@ -158,8 +158,15 @@ func (h *HostVolumeChecker) SetVolumes(volumes map[string]*structs.VolumeRequest
continue
}
if req.PerAlloc {
// provide a unique volume source per allocation
copied := req.Copy()
copied.Source = copied.Source + structs.AllocSuffix(allocName)
lookupMap[copied.Source] = append(lookupMap[copied.Source], copied)
} else {
lookupMap[req.Source] = append(lookupMap[req.Source], req)
}
}
h.volumes = lookupMap
}

View File

@ -105,6 +105,7 @@ func TestHostVolumeChecker(t *testing.T) {
nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{
"foo": {},
"bar": {},
"unique-volume[0]": {},
}
nodes[3].HostVolumes = map[string]*structs.ClientHostVolumeConfig{
"foo": {},
@ -130,6 +131,11 @@ func TestHostVolumeChecker(t *testing.T) {
Type: "nothost",
Source: "baz",
},
"unique": {
Type: "host",
Source: "unique-volume[0]",
PerAlloc: true,
},
}
checker := NewHostVolumeChecker(ctx)
@ -165,8 +171,11 @@ func TestHostVolumeChecker(t *testing.T) {
},
}
alloc := mock.Alloc()
alloc.NodeID = nodes[2].ID
for i, c := range cases {
checker.SetVolumes(c.RequestedVolumes)
checker.SetVolumes(alloc.Name, c.RequestedVolumes)
if act := checker.Feasible(c.Node); act != c.Result {
t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result)
}
@ -235,8 +244,12 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) {
Result: true,
},
}
alloc := mock.Alloc()
alloc.NodeID = nodes[1].ID
for i, c := range cases {
checker.SetVolumes(c.RequestedVolumes)
checker.SetVolumes(alloc.Name, c.RequestedVolumes)
if act := checker.Feasible(c.Node); act != c.Result {
t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result)
}

View File

@ -144,7 +144,7 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra
s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
s.taskGroupDevices.SetTaskGroup(tg)
s.taskGroupHostVolumes.SetVolumes(tg.Volumes)
s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes)
s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes)
if len(tg.Networks) > 0 {
s.taskGroupNetwork.SetNetwork(tg.Networks[0])
@ -321,7 +321,7 @@ func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ran
s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
s.taskGroupDevices.SetTaskGroup(tg)
s.taskGroupHostVolumes.SetVolumes(tg.Volumes)
s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes)
s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes)
if len(tg.Networks) > 0 {
s.taskGroupNetwork.SetNetwork(tg.Networks[0])

View File

@ -105,7 +105,7 @@ a future release.
stopping any previous allocations. Once the operator determines the canaries
are healthy, they can be promoted which unblocks a rolling update of the
remaining allocations at a rate of `max_parallel`. Canary deployments cannot
be used with CSI volumes when `per_alloc = true`.
be used with volumes when `per_alloc = true`.
- `stagger` `(string: "30s")` - Specifies the delay between each set of
[`max_parallel`](#max_parallel) updates when updating system jobs. This

View File

@ -75,6 +75,13 @@ the [volume_mount][volume_mount] stanza in the `task` configuration.
used for validating `host_volume` ACLs and for scheduling when a
matching `host_volume` requires `read_only` usage.
- `per_alloc` `(bool: false)` - Specifies that the `source` of the volume
should have the suffix `[n]`, where `n` is the allocation index. This allows
mounting a unique volume per allocation, so long as the volume's source is
named appropriately. For example, with the source `myvolume` and `per_alloc
= true`, the allocation named `myjob.mygroup.mytask[0]` will require a
volume ID `myvolume[0]`.
The following fields are only valid for volumes with `type = "csi"`:
- `access_mode` `(string: <required>)` - Defines whether a volume should be
@ -92,13 +99,6 @@ The following fields are only valid for volumes with `type = "csi"`:
storage providers will support `"block-device"`, which will mount the volume
with the CSI block device API within the container.
- `per_alloc` `(bool: false)` - Specifies that the `source` of the volume
should have the suffix `[n]`, where `n` is the allocation index. This allows
mounting a unique volume per allocation, so long as the volume's source is
named appropriately. For example, with the source `myvolume` and `per_alloc
= true`, the allocation named `myjob.mygroup.mytask[0]` will require a
volume ID `myvolume[0]`.
- `mount_options` - Options for mounting CSI volumes that have the
`file-system` [attachment mode]. These options override the `mount_options`
field from [volume registration]. Consult the documentation for your storage