CSI: enforce usage at claim time (#12112)
* Remove redundant schedulable check in `FreeWriteClaims`. If a volume has been created but not yet claimed, its capabilities will be checked in `WriteSchedulable` at both scheduling time and claim time. We don't need to also check them in the `FreeWriteClaims` method. * Enforce maximum volume claims for writers. When the scheduler checks feasibility for CSI volumes, the check is fairly loose: earlier versions of the same job are not counted as active claims. This allows the scheduler to place new allocations for the new version of a job, under the assumption that we'll replace the existing allocations and their volume claims. But when the alloc runner claims the volume, we need to enforce the active claims even if they're for allocations of an earlier version of the job. Otherwise we'll try to mount a volume that's currently being unmounted, and this will cause replacement allocations to frequently fail. * Enforce single-node reader check for read-only volumes. When the alloc runner makes a claim for a read-only volume, we only check that the volume is potentially schedulable and not that it actually has free read claims.
This commit is contained in:
parent
42b338308f
commit
cfe3117af8
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
csi: Fixed a bug where the maximum number of volume claims was incorrectly enforced when an allocation claims a volume
|
||||
```
|
|
@ -243,9 +243,11 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
// Create an initial volume claim request; we expect it to fail
|
||||
// because there's no such volume yet.
|
||||
claimReq := &structs.CSIVolumeClaimRequest{
|
||||
VolumeID: id0,
|
||||
AllocationID: alloc.ID,
|
||||
Claim: structs.CSIVolumeClaimWrite,
|
||||
VolumeID: id0,
|
||||
AllocationID: alloc.ID,
|
||||
Claim: structs.CSIVolumeClaimWrite,
|
||||
AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter,
|
||||
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
||||
WriteRequest: structs.WriteRequest{
|
||||
Region: "global",
|
||||
Namespace: structs.DefaultNamespace,
|
||||
|
|
|
@ -411,6 +411,26 @@ func (v *CSIVolume) WriteSchedulable() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// HasFreeReadClaims determines if there are any free read claims available
|
||||
func (v *CSIVolume) HasFreeReadClaims() bool {
|
||||
switch v.AccessMode {
|
||||
case CSIVolumeAccessModeSingleNodeReader:
|
||||
return len(v.ReadClaims) == 0
|
||||
case CSIVolumeAccessModeSingleNodeWriter:
|
||||
return len(v.ReadClaims) == 0 && len(v.WriteClaims) == 0
|
||||
case CSIVolumeAccessModeUnknown:
|
||||
// This volume was created but not yet claimed, so its
|
||||
// capabilities have been checked in ReadSchedulable
|
||||
return true
|
||||
default:
|
||||
// For multi-node AccessModes, the CSI spec doesn't allow for
|
||||
// setting a max number of readers we track node resource
|
||||
// exhaustion through v.ResourceExhausted which is checked in
|
||||
// ReadSchedulable
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// HasFreeWriteClaims determines if there are any free write claims available
|
||||
func (v *CSIVolume) HasFreeWriteClaims() bool {
|
||||
switch v.AccessMode {
|
||||
|
@ -422,24 +442,13 @@ func (v *CSIVolume) HasFreeWriteClaims() bool {
|
|||
// which is checked in WriteSchedulable
|
||||
return true
|
||||
case CSIVolumeAccessModeUnknown:
|
||||
// this volume was created but not yet claimed, so we check what it's
|
||||
// capable of, not what it's been assigned
|
||||
if len(v.RequestedCapabilities) == 0 {
|
||||
// COMPAT: a volume that was registered before 1.1.0 and has not
|
||||
// had a change in claims could have no requested caps. It will
|
||||
// get corrected on the first claim.
|
||||
return true
|
||||
}
|
||||
for _, cap := range v.RequestedCapabilities {
|
||||
switch cap.AccessMode {
|
||||
case CSIVolumeAccessModeSingleNodeWriter, CSIVolumeAccessModeMultiNodeSingleWriter:
|
||||
return len(v.WriteClaims) == 0
|
||||
case CSIVolumeAccessModeMultiNodeMultiWriter:
|
||||
return true
|
||||
}
|
||||
}
|
||||
// This volume was created but not yet claimed, so its
|
||||
// capabilities have been checked in WriteSchedulable
|
||||
return true
|
||||
default:
|
||||
// Reader modes never have free write claims
|
||||
return false
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// InUse tests whether any allocations are actively using the volume
|
||||
|
@ -538,6 +547,10 @@ func (v *CSIVolume) claimRead(claim *CSIVolumeClaim, alloc *Allocation) error {
|
|||
return ErrCSIVolumeUnschedulable
|
||||
}
|
||||
|
||||
if !v.HasFreeReadClaims() {
|
||||
return ErrCSIVolumeMaxClaims
|
||||
}
|
||||
|
||||
// Allocations are copy on write, so we want to keep the id but don't need the
|
||||
// pointer. We'll get it from the db in denormalize.
|
||||
v.ReadAllocs[claim.AllocationID] = nil
|
||||
|
@ -565,12 +578,7 @@ func (v *CSIVolume) claimWrite(claim *CSIVolumeClaim, alloc *Allocation) error {
|
|||
}
|
||||
|
||||
if !v.HasFreeWriteClaims() {
|
||||
// Check the blocking allocations to see if they belong to this job
|
||||
for _, a := range v.WriteAllocs {
|
||||
if a != nil && (a.Namespace != alloc.Namespace || a.JobID != alloc.JobID) {
|
||||
return ErrCSIVolumeMaxClaims
|
||||
}
|
||||
}
|
||||
return ErrCSIVolumeMaxClaims
|
||||
}
|
||||
|
||||
// Allocations are copy on write, so we want to keep the id but don't need the
|
||||
|
|
Loading…
Reference in New Issue