CSI: ensure initial unpublish state is checkpointed (#14675)

A test flake revealed a bug in the CSI unpublish workflow, where an unpublish
that comes from a client that's successfully done the node-unpublish step will
not have the claim checkpointed if the controller-unpublish step fails. This
will result in a delay in releasing the volume claim until the next GC.

This changeset also ensures we're using a new snapshot after each write to raft,
and fixes two timing issues in test where either the volume watcher can
unpublish before the unpublish RPC is sent or we don't wait long enough in
resource-restricted environements like GHA.
This commit is contained in:
Tim Gross 2022-09-27 08:43:45 -04:00 committed by GitHub
parent fb8739d926
commit 87681fca68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 10 deletions

3
.changelog/14675.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
csi: Fixed a bug where a volume that was successfully unmounted by the client but then failed controller unpublishing would not be marked free until garbage collection ran.
```

View File

@ -2451,7 +2451,7 @@ func TestCoreScheduler_CSIBadState_ClaimGC(t *testing.T) {
}
}
return true
}, time.Second*1, 10*time.Millisecond, "invalid claims should be marked for GC")
}, time.Second*5, 10*time.Millisecond, "invalid claims should be marked for GC")
}

View File

@ -647,6 +647,16 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st
claim := args.Claim
// we need to checkpoint when we first get the claim to ensure we've set the
// initial "past claim" state, otherwise a client that unpublishes (skipping
// the node unpublish b/c it's done that work) fail to get written if the
// controller unpublish fails.
vol = vol.Copy()
err = v.checkpointClaim(vol, claim)
if err != nil {
return err
}
// previous checkpoints may have set the past claim state already.
// in practice we should never see CSIVolumeClaimStateControllerDetached
// but having an option for the state makes it easy to add a checkpoint
@ -693,14 +703,18 @@ RELEASE_CLAIM:
func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
v.logger.Trace("node unpublish", "vol", vol.ID)
store := v.srv.fsm.State()
// We need a new snapshot after each checkpoint
snap, err := v.srv.fsm.State().Snapshot()
if err != nil {
return err
}
// If the node has been GC'd or is down, we can't send it a node
// unpublish. We need to assume the node has unpublished at its
// end. If it hasn't, any controller unpublish will potentially
// hang or error and need to be retried.
if claim.NodeID != "" {
node, err := store.NodeByID(memdb.NewWatchSet(), claim.NodeID)
node, err := snap.NodeByID(memdb.NewWatchSet(), claim.NodeID)
if err != nil {
return err
}
@ -723,7 +737,7 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C
// The RPC sent from the 'nomad node detach' command or GC won't have an
// allocation ID set so we try to unpublish every terminal or invalid
// alloc on the node, all of which will be in PastClaims after denormalizing
vol, err := store.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
vol, err = snap.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
if err != nil {
return err
}
@ -793,10 +807,15 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str
return nil
}
state := v.srv.fsm.State()
// We need a new snapshot after each checkpoint
snap, err := v.srv.fsm.State().Snapshot()
if err != nil {
return err
}
ws := memdb.NewWatchSet()
plugin, err := state.CSIPluginByID(ws, vol.PluginID)
plugin, err := snap.CSIPluginByID(ws, vol.PluginID)
if err != nil {
return fmt.Errorf("could not query plugin: %v", err)
} else if plugin == nil {
@ -808,7 +827,7 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str
return nil
}
vol, err = state.CSIVolumeDenormalize(ws, vol)
vol, err = snap.CSIVolumeDenormalize(ws, vol)
if err != nil {
return err
}

View File

@ -575,7 +575,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
// setup: create an alloc that will claim our volume
alloc := mock.BatchAlloc()
alloc.NodeID = tc.nodeID
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.ClientStatus = structs.AllocClientStatusRunning
otherAlloc := mock.BatchAlloc()
otherAlloc.NodeID = tc.otherNodeID
@ -585,7 +585,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index,
[]*structs.Allocation{alloc, otherAlloc}))
// setup: claim the volume for our alloc
// setup: claim the volume for our to-be-failed alloc
claim := &structs.CSIVolumeClaim{
AllocationID: alloc.ID,
NodeID: tc.nodeID,
@ -623,10 +623,19 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
},
}
alloc = alloc.Copy()
alloc.ClientStatus = structs.AllocClientStatusFailed
index++
must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index,
[]*structs.Allocation{alloc}))
err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Unpublish", req,
&structs.CSIVolumeUnpublishResponse{})
vol, volErr := state.CSIVolumeByID(nil, ns, volID)
snap, snapErr := state.Snapshot()
must.NoError(t, snapErr)
vol, volErr := snap.CSIVolumeByID(nil, ns, volID)
must.NoError(t, volErr)
must.NotNil(t, vol)