csi: fix handling of garbage collected node in node unpublish (#12350)

When a node is garbage collected, we assume that the volume is no
longer attached to it and ignore the `ErrUnknownNode` error. But we
used `errors.Is` to check for a wrapped error, and RPC flattens the
errors during serialization. This results in an error check that works
in automated testing but not in real clusters. Use a string contains
check instead.
This commit is contained in:
Tim Gross 2022-03-22 15:40:24 -04:00 committed by GitHub
parent f8973d364e
commit 33558cb51e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 2 deletions

3
.changelog/12350.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
csi: Fixed a bug where garbage collected nodes would block releasing a volume
```

View File

@ -1,9 +1,9 @@
package nomad
import (
"errors"
"fmt"
"net/http"
"strings"
"time"
metrics "github.com/armon/go-metrics"
@ -741,7 +741,9 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc
// we should only get this error if the Nomad node disconnects and
// is garbage-collected, so at this point we don't have any reason
// to operate as though the volume is attached to it.
if !errors.Is(err, structs.ErrUnknownNode) {
// note: errors.Is cannot be used because the RPC call breaks
// error wrapping.
if !strings.Contains(err.Error(), structs.ErrUnknownNode.Error()) {
return fmt.Errorf("could not detach from node: %w", err)
}
}