open-nomad/client/allocrunner/csi_hook_test.go
Tim Gross ad7355e58b
CSI: persist previous mounts on client to restore during restart (#17840)
When claiming a CSI volume, we need to ensure the CSI node plugin is running
before we send any CSI RPCs. This extends even to the controller publish RPC
because it requires the storage provider's "external node ID" for the
client. This primarily impacts client restarts but also is a problem if the node
plugin exits (and fingerprints) while the allocation that needs a CSI volume
claim is being placed.

Unfortunately there's no mapping of volume to plugin ID available in the
jobspec, so we don't have enough information to wait on plugins until we either
get the volume from the server or retrieve the plugin ID from data we've
persisted on the client.

If we always require getting the volume from the server before making the claim,
a client restart for disconnected clients will cause all the allocations that
need CSI volumes to fail. Even while connected, checking in with the server to
verify the volume's plugin before trying to make a claim RPC is inherently racy,
so we'll leave that case as-is and it will fail the claim if the node plugin
needed to support a newly-placed allocation is flapping such that the node
fingerprint is changing.

This changeset persists a minimum subset of data about the volume and its plugin
in the client state DB, and retrieves that data during the CSI hook's prerun to
avoid re-claiming and remounting the volume unnecessarily.

This changeset also updates the RPC handler to use the external node ID from the
claim whenever it is available.

Fixes: #13028
2023-07-10 13:20:15 -04:00

548 lines
16 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package allocrunner
import (
"context"
"errors"
"fmt"
"path/filepath"
"sync"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/allocrunner/state"
"github.com/hashicorp/nomad/client/pluginmanager"
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
"golang.org/x/exp/maps"
)
var _ interfaces.RunnerPrerunHook = (*csiHook)(nil)
var _ interfaces.RunnerPostrunHook = (*csiHook)(nil)
func TestCSIHook(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
testMountSrc := fmt.Sprintf(
"test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)
logger := testlog.HCLogger(t)
testcases := []struct {
name string
volumeRequests map[string]*structs.VolumeRequest
startsUnschedulable bool
startsWithClaims bool
startsWithStubs map[string]*state.CSIVolumeStub
startsWithValidMounts bool
failsFirstUnmount bool
expectedClaimErr error
expectedMounts map[string]*csimanager.MountInfo
expectedCalls map[string]int
}{
{
name: "simple case",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{
"claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
},
{
name: "per-alloc case",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: true,
},
},
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{
"claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
},
{
name: "fatal error on claim",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
startsUnschedulable: true,
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{"claim": 1},
expectedClaimErr: errors.New(
"claiming volumes: could not claim volume testvolume0: volume is currently unschedulable"),
},
{
name: "retryable error on claim",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
startsWithClaims: true,
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{
"claim": 2, "mount": 1, "unmount": 1, "unpublish": 1},
},
{
name: "already mounted",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
startsWithStubs: map[string]*state.CSIVolumeStub{"vol0": {
VolumeID: "vol0",
PluginID: "vol0-plugin",
ExternalNodeID: "i-example",
MountInfo: &csimanager.MountInfo{Source: testMountSrc},
}},
startsWithValidMounts: true,
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{"hasMount": 1, "unmount": 1, "unpublish": 1},
},
{
name: "existing but invalid mounts",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
startsWithStubs: map[string]*state.CSIVolumeStub{"vol0": {
VolumeID: "testvolume0",
PluginID: "vol0-plugin",
ExternalNodeID: "i-example",
MountInfo: &csimanager.MountInfo{Source: testMountSrc},
}},
startsWithValidMounts: false,
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{
"hasMount": 1, "claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
},
{
name: "retry on failed unmount",
volumeRequests: map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
},
failsFirstUnmount: true,
expectedMounts: map[string]*csimanager.MountInfo{
"vol0": &csimanager.MountInfo{Source: testMountSrc},
},
expectedCalls: map[string]int{
"claim": 1, "mount": 1, "unmount": 2, "unpublish": 2},
},
{
name: "should not run",
volumeRequests: map[string]*structs.VolumeRequest{},
},
}
for i := range testcases {
tc := testcases[i]
t.Run(tc.name, func(t *testing.T) {
alloc.Job.TaskGroups[0].Volumes = tc.volumeRequests
callCounts := &callCounter{counts: map[string]int{}}
mgr := mockPluginManager{mounter: mockVolumeMounter{
hasMounts: tc.startsWithValidMounts,
callCounts: callCounts,
failsFirstUnmount: pointer.Of(tc.failsFirstUnmount),
}}
rpcer := mockRPCer{
alloc: alloc,
callCounts: callCounts,
hasExistingClaim: pointer.Of(tc.startsWithClaims),
schedulable: pointer.Of(!tc.startsUnschedulable),
}
ar := mockAllocRunner{
res: &cstructs.AllocHookResources{},
caps: &drivers.Capabilities{
FSIsolation: drivers.FSIsolationChroot,
MountConfigs: drivers.MountConfigSupportAll,
},
stubs: tc.startsWithStubs,
}
hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar.res, "secret")
hook.minBackoffInterval = 1 * time.Millisecond
hook.maxBackoffInterval = 10 * time.Millisecond
hook.maxBackoffDuration = 500 * time.Millisecond
must.NotNil(t, hook)
if tc.expectedClaimErr != nil {
must.EqError(t, hook.Prerun(), tc.expectedClaimErr.Error())
mounts := ar.res.GetCSIMounts()
must.Nil(t, mounts)
} else {
must.NoError(t, hook.Prerun())
mounts := ar.res.GetCSIMounts()
must.MapEq(t, tc.expectedMounts, mounts,
must.Sprintf("got mounts: %v", mounts))
must.NoError(t, hook.Postrun())
}
if tc.failsFirstUnmount {
// retrying the unmount doesn't block Postrun, so give it time
// to run once more before checking the call counts to ensure
// this doesn't flake between 1 and 2 unmount/unpublish calls
time.Sleep(100 * time.Millisecond)
}
counts := callCounts.get()
must.MapEq(t, tc.expectedCalls, counts,
must.Sprintf("got calls: %v", counts))
})
}
}
// TestCSIHook_Prerun_Validation tests that the validation of task capabilities
// in Prerun ensures at least one task supports CSI.
func TestCSIHook_Prerun_Validation(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
logger := testlog.HCLogger(t)
volumeRequests := map[string]*structs.VolumeRequest{
"vol0": {
Name: "vol0",
Type: structs.VolumeTypeCSI,
Source: "testvolume0",
ReadOnly: true,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
MountOptions: &structs.CSIMountOptions{},
PerAlloc: false,
},
}
type testCase struct {
name string
caps *drivers.Capabilities
capFunc func() (*drivers.Capabilities, error)
expectedErr string
}
testcases := []testCase{
{
name: "invalid - driver does not support CSI",
caps: &drivers.Capabilities{
MountConfigs: drivers.MountConfigSupportNone,
},
capFunc: nil,
expectedErr: "no task supports CSI",
},
{
name: "invalid - driver error",
caps: &drivers.Capabilities{},
capFunc: func() (*drivers.Capabilities, error) {
return nil, errors.New("error thrown by driver")
},
expectedErr: "could not validate task driver capabilities: error thrown by driver",
},
{
name: "valid - driver supports CSI",
caps: &drivers.Capabilities{
MountConfigs: drivers.MountConfigSupportAll,
},
capFunc: nil,
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
alloc.Job.TaskGroups[0].Volumes = volumeRequests
callCounts := &callCounter{counts: map[string]int{}}
mgr := mockPluginManager{mounter: mockVolumeMounter{
callCounts: callCounts,
failsFirstUnmount: pointer.Of(false),
}}
rpcer := mockRPCer{
alloc: alloc,
callCounts: callCounts,
hasExistingClaim: pointer.Of(false),
schedulable: pointer.Of(true),
}
ar := mockAllocRunner{
res: &cstructs.AllocHookResources{},
caps: tc.caps,
capFunc: tc.capFunc,
}
hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar.res, "secret")
require.NotNil(t, hook)
if tc.expectedErr != "" {
require.EqualError(t, hook.Prerun(), tc.expectedErr)
mounts := ar.res.GetCSIMounts()
require.Nil(t, mounts)
} else {
require.NoError(t, hook.Prerun())
mounts := ar.res.GetCSIMounts()
require.NotNil(t, mounts)
require.NoError(t, hook.Postrun())
}
})
}
}
// HELPERS AND MOCKS
type callCounter struct {
lock sync.Mutex
counts map[string]int
}
func (c *callCounter) inc(name string) {
c.lock.Lock()
defer c.lock.Unlock()
c.counts[name]++
}
func (c *callCounter) get() map[string]int {
c.lock.Lock()
defer c.lock.Unlock()
return maps.Clone(c.counts)
}
type mockRPCer struct {
alloc *structs.Allocation
callCounts *callCounter
hasExistingClaim *bool
schedulable *bool
}
// RPC mocks the server RPCs, acting as though any request succeeds
func (r mockRPCer) RPC(method string, args any, reply any) error {
switch method {
case "CSIVolume.Claim":
r.callCounts.inc("claim")
req := args.(*structs.CSIVolumeClaimRequest)
vol := r.testVolume(req.VolumeID)
err := vol.Claim(req.ToClaim(), r.alloc)
// after the first claim attempt is made, reset the volume's claims as
// though it's been released from another node
*r.hasExistingClaim = false
*r.schedulable = true
if err != nil {
return err
}
resp := reply.(*structs.CSIVolumeClaimResponse)
resp.PublishContext = map[string]string{}
resp.Volume = vol
resp.QueryMeta = structs.QueryMeta{}
case "CSIVolume.Unpublish":
r.callCounts.inc("unpublish")
resp := reply.(*structs.CSIVolumeUnpublishResponse)
resp.QueryMeta = structs.QueryMeta{}
default:
return fmt.Errorf("unexpected method")
}
return nil
}
// testVolume is a helper that optionally starts as unschedulable / claimed, so
// that we can test retryable vs non-retryable failures
func (r mockRPCer) testVolume(id string) *structs.CSIVolume {
vol := structs.NewCSIVolume(id, 0)
vol.Schedulable = *r.schedulable
vol.PluginID = "plugin-" + id
vol.RequestedCapabilities = []*structs.CSIVolumeCapability{
{
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
},
{
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter,
},
}
if *r.hasExistingClaim {
vol.AccessMode = structs.CSIVolumeAccessModeSingleNodeReader
vol.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem
vol.ReadClaims["another-alloc-id"] = &structs.CSIVolumeClaim{
AllocationID: "another-alloc-id",
NodeID: "another-node-id",
Mode: structs.CSIVolumeClaimRead,
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
State: structs.CSIVolumeClaimStateTaken,
}
}
return vol
}
type mockVolumeMounter struct {
hasMounts bool
failsFirstUnmount *bool
callCounts *callCounter
}
func (vm mockVolumeMounter) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usageOpts *csimanager.UsageOptions, publishContext map[string]string) (*csimanager.MountInfo, error) {
vm.callCounts.inc("mount")
return &csimanager.MountInfo{
Source: filepath.Join("test-alloc-dir", alloc.ID, vol.ID, usageOpts.ToFS()),
}, nil
}
func (vm mockVolumeMounter) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usageOpts *csimanager.UsageOptions) error {
vm.callCounts.inc("unmount")
if *vm.failsFirstUnmount {
*vm.failsFirstUnmount = false
return fmt.Errorf("could not unmount")
}
return nil
}
func (vm mockVolumeMounter) HasMount(_ context.Context, mountInfo *csimanager.MountInfo) (bool, error) {
vm.callCounts.inc("hasMount")
return mountInfo != nil && vm.hasMounts, nil
}
func (vm mockVolumeMounter) ExternalID() string {
return "i-example"
}
type mockPluginManager struct {
mounter mockVolumeMounter
}
func (mgr mockPluginManager) WaitForPlugin(ctx context.Context, pluginType, pluginID string) error {
return nil
}
func (mgr mockPluginManager) MounterForPlugin(ctx context.Context, pluginID string) (csimanager.VolumeMounter, error) {
return mgr.mounter, nil
}
// no-op methods to fulfill the interface
func (mgr mockPluginManager) PluginManager() pluginmanager.PluginManager { return nil }
func (mgr mockPluginManager) Shutdown() {}
type mockAllocRunner struct {
res *cstructs.AllocHookResources
caps *drivers.Capabilities
capFunc func() (*drivers.Capabilities, error)
stubs map[string]*state.CSIVolumeStub
stubFunc func() (map[string]*state.CSIVolumeStub, error)
}
func (ar mockAllocRunner) GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) {
if ar.capFunc != nil {
return ar.capFunc()
}
return ar.caps, nil
}
func (ar mockAllocRunner) SetCSIVolumes(stubs map[string]*state.CSIVolumeStub) error {
ar.stubs = stubs
return nil
}
func (ar mockAllocRunner) GetCSIVolumes() (map[string]*state.CSIVolumeStub, error) {
if ar.stubFunc != nil {
return ar.stubFunc()
}
return ar.stubs, nil
}