ad7355e58b
When claiming a CSI volume, we need to ensure the CSI node plugin is running before we send any CSI RPCs. This extends even to the controller publish RPC because it requires the storage provider's "external node ID" for the client. This primarily impacts client restarts but also is a problem if the node plugin exits (and fingerprints) while the allocation that needs a CSI volume claim is being placed. Unfortunately there's no mapping of volume to plugin ID available in the jobspec, so we don't have enough information to wait on plugins until we either get the volume from the server or retrieve the plugin ID from data we've persisted on the client. If we always require getting the volume from the server before making the claim, a client restart for disconnected clients will cause all the allocations that need CSI volumes to fail. Even while connected, checking in with the server to verify the volume's plugin before trying to make a claim RPC is inherently racy, so we'll leave that case as-is and it will fail the claim if the node plugin needed to support a newly-placed allocation is flapping such that the node fingerprint is changing. This changeset persists a minimum subset of data about the volume and its plugin in the client state DB, and retrieves that data during the CSI hook's prerun to avoid re-claiming and remounting the volume unnecessarily. This changeset also updates the RPC handler to use the external node ID from the claim whenever it is available. Fixes: #13028
548 lines
16 KiB
Go
548 lines
16 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package allocrunner
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"path/filepath"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
"github.com/hashicorp/nomad/client/allocrunner/state"
|
|
"github.com/hashicorp/nomad/client/pluginmanager"
|
|
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
"github.com/shoenig/test/must"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/exp/maps"
|
|
)
|
|
|
|
var _ interfaces.RunnerPrerunHook = (*csiHook)(nil)
|
|
var _ interfaces.RunnerPostrunHook = (*csiHook)(nil)
|
|
|
|
func TestCSIHook(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
alloc := mock.Alloc()
|
|
testMountSrc := fmt.Sprintf(
|
|
"test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)
|
|
logger := testlog.HCLogger(t)
|
|
|
|
testcases := []struct {
|
|
name string
|
|
volumeRequests map[string]*structs.VolumeRequest
|
|
startsUnschedulable bool
|
|
startsWithClaims bool
|
|
startsWithStubs map[string]*state.CSIVolumeStub
|
|
startsWithValidMounts bool
|
|
failsFirstUnmount bool
|
|
expectedClaimErr error
|
|
expectedMounts map[string]*csimanager.MountInfo
|
|
expectedCalls map[string]int
|
|
}{
|
|
|
|
{
|
|
name: "simple case",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{
|
|
"claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
|
|
},
|
|
|
|
{
|
|
name: "per-alloc case",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: true,
|
|
},
|
|
},
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{
|
|
"claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
|
|
},
|
|
|
|
{
|
|
name: "fatal error on claim",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
startsUnschedulable: true,
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{"claim": 1},
|
|
expectedClaimErr: errors.New(
|
|
"claiming volumes: could not claim volume testvolume0: volume is currently unschedulable"),
|
|
},
|
|
|
|
{
|
|
name: "retryable error on claim",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
startsWithClaims: true,
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{
|
|
"claim": 2, "mount": 1, "unmount": 1, "unpublish": 1},
|
|
},
|
|
{
|
|
name: "already mounted",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
startsWithStubs: map[string]*state.CSIVolumeStub{"vol0": {
|
|
VolumeID: "vol0",
|
|
PluginID: "vol0-plugin",
|
|
ExternalNodeID: "i-example",
|
|
MountInfo: &csimanager.MountInfo{Source: testMountSrc},
|
|
}},
|
|
startsWithValidMounts: true,
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{"hasMount": 1, "unmount": 1, "unpublish": 1},
|
|
},
|
|
{
|
|
name: "existing but invalid mounts",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
startsWithStubs: map[string]*state.CSIVolumeStub{"vol0": {
|
|
VolumeID: "testvolume0",
|
|
PluginID: "vol0-plugin",
|
|
ExternalNodeID: "i-example",
|
|
MountInfo: &csimanager.MountInfo{Source: testMountSrc},
|
|
}},
|
|
startsWithValidMounts: false,
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{
|
|
"hasMount": 1, "claim": 1, "mount": 1, "unmount": 1, "unpublish": 1},
|
|
},
|
|
|
|
{
|
|
name: "retry on failed unmount",
|
|
volumeRequests: map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
},
|
|
failsFirstUnmount: true,
|
|
expectedMounts: map[string]*csimanager.MountInfo{
|
|
"vol0": &csimanager.MountInfo{Source: testMountSrc},
|
|
},
|
|
expectedCalls: map[string]int{
|
|
"claim": 1, "mount": 1, "unmount": 2, "unpublish": 2},
|
|
},
|
|
|
|
{
|
|
name: "should not run",
|
|
volumeRequests: map[string]*structs.VolumeRequest{},
|
|
},
|
|
}
|
|
|
|
for i := range testcases {
|
|
tc := testcases[i]
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
|
|
alloc.Job.TaskGroups[0].Volumes = tc.volumeRequests
|
|
|
|
callCounts := &callCounter{counts: map[string]int{}}
|
|
mgr := mockPluginManager{mounter: mockVolumeMounter{
|
|
hasMounts: tc.startsWithValidMounts,
|
|
callCounts: callCounts,
|
|
failsFirstUnmount: pointer.Of(tc.failsFirstUnmount),
|
|
}}
|
|
rpcer := mockRPCer{
|
|
alloc: alloc,
|
|
callCounts: callCounts,
|
|
hasExistingClaim: pointer.Of(tc.startsWithClaims),
|
|
schedulable: pointer.Of(!tc.startsUnschedulable),
|
|
}
|
|
ar := mockAllocRunner{
|
|
res: &cstructs.AllocHookResources{},
|
|
caps: &drivers.Capabilities{
|
|
FSIsolation: drivers.FSIsolationChroot,
|
|
MountConfigs: drivers.MountConfigSupportAll,
|
|
},
|
|
stubs: tc.startsWithStubs,
|
|
}
|
|
|
|
hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar.res, "secret")
|
|
hook.minBackoffInterval = 1 * time.Millisecond
|
|
hook.maxBackoffInterval = 10 * time.Millisecond
|
|
hook.maxBackoffDuration = 500 * time.Millisecond
|
|
|
|
must.NotNil(t, hook)
|
|
|
|
if tc.expectedClaimErr != nil {
|
|
must.EqError(t, hook.Prerun(), tc.expectedClaimErr.Error())
|
|
mounts := ar.res.GetCSIMounts()
|
|
must.Nil(t, mounts)
|
|
} else {
|
|
must.NoError(t, hook.Prerun())
|
|
mounts := ar.res.GetCSIMounts()
|
|
must.MapEq(t, tc.expectedMounts, mounts,
|
|
must.Sprintf("got mounts: %v", mounts))
|
|
must.NoError(t, hook.Postrun())
|
|
}
|
|
|
|
if tc.failsFirstUnmount {
|
|
// retrying the unmount doesn't block Postrun, so give it time
|
|
// to run once more before checking the call counts to ensure
|
|
// this doesn't flake between 1 and 2 unmount/unpublish calls
|
|
time.Sleep(100 * time.Millisecond)
|
|
}
|
|
|
|
counts := callCounts.get()
|
|
must.MapEq(t, tc.expectedCalls, counts,
|
|
must.Sprintf("got calls: %v", counts))
|
|
|
|
})
|
|
}
|
|
|
|
}
|
|
|
|
// TestCSIHook_Prerun_Validation tests that the validation of task capabilities
|
|
// in Prerun ensures at least one task supports CSI.
|
|
func TestCSIHook_Prerun_Validation(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
alloc := mock.Alloc()
|
|
logger := testlog.HCLogger(t)
|
|
volumeRequests := map[string]*structs.VolumeRequest{
|
|
"vol0": {
|
|
Name: "vol0",
|
|
Type: structs.VolumeTypeCSI,
|
|
Source: "testvolume0",
|
|
ReadOnly: true,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
MountOptions: &structs.CSIMountOptions{},
|
|
PerAlloc: false,
|
|
},
|
|
}
|
|
|
|
type testCase struct {
|
|
name string
|
|
caps *drivers.Capabilities
|
|
capFunc func() (*drivers.Capabilities, error)
|
|
expectedErr string
|
|
}
|
|
|
|
testcases := []testCase{
|
|
{
|
|
name: "invalid - driver does not support CSI",
|
|
caps: &drivers.Capabilities{
|
|
MountConfigs: drivers.MountConfigSupportNone,
|
|
},
|
|
capFunc: nil,
|
|
expectedErr: "no task supports CSI",
|
|
},
|
|
|
|
{
|
|
name: "invalid - driver error",
|
|
caps: &drivers.Capabilities{},
|
|
capFunc: func() (*drivers.Capabilities, error) {
|
|
return nil, errors.New("error thrown by driver")
|
|
},
|
|
expectedErr: "could not validate task driver capabilities: error thrown by driver",
|
|
},
|
|
|
|
{
|
|
name: "valid - driver supports CSI",
|
|
caps: &drivers.Capabilities{
|
|
MountConfigs: drivers.MountConfigSupportAll,
|
|
},
|
|
capFunc: nil,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testcases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
alloc.Job.TaskGroups[0].Volumes = volumeRequests
|
|
|
|
callCounts := &callCounter{counts: map[string]int{}}
|
|
mgr := mockPluginManager{mounter: mockVolumeMounter{
|
|
callCounts: callCounts,
|
|
failsFirstUnmount: pointer.Of(false),
|
|
}}
|
|
rpcer := mockRPCer{
|
|
alloc: alloc,
|
|
callCounts: callCounts,
|
|
hasExistingClaim: pointer.Of(false),
|
|
schedulable: pointer.Of(true),
|
|
}
|
|
|
|
ar := mockAllocRunner{
|
|
res: &cstructs.AllocHookResources{},
|
|
caps: tc.caps,
|
|
capFunc: tc.capFunc,
|
|
}
|
|
|
|
hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar.res, "secret")
|
|
require.NotNil(t, hook)
|
|
|
|
if tc.expectedErr != "" {
|
|
require.EqualError(t, hook.Prerun(), tc.expectedErr)
|
|
mounts := ar.res.GetCSIMounts()
|
|
require.Nil(t, mounts)
|
|
} else {
|
|
require.NoError(t, hook.Prerun())
|
|
mounts := ar.res.GetCSIMounts()
|
|
require.NotNil(t, mounts)
|
|
require.NoError(t, hook.Postrun())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// HELPERS AND MOCKS
|
|
|
|
type callCounter struct {
|
|
lock sync.Mutex
|
|
counts map[string]int
|
|
}
|
|
|
|
func (c *callCounter) inc(name string) {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
c.counts[name]++
|
|
}
|
|
|
|
func (c *callCounter) get() map[string]int {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
return maps.Clone(c.counts)
|
|
}
|
|
|
|
type mockRPCer struct {
|
|
alloc *structs.Allocation
|
|
callCounts *callCounter
|
|
hasExistingClaim *bool
|
|
schedulable *bool
|
|
}
|
|
|
|
// RPC mocks the server RPCs, acting as though any request succeeds
|
|
func (r mockRPCer) RPC(method string, args any, reply any) error {
|
|
switch method {
|
|
case "CSIVolume.Claim":
|
|
r.callCounts.inc("claim")
|
|
req := args.(*structs.CSIVolumeClaimRequest)
|
|
vol := r.testVolume(req.VolumeID)
|
|
err := vol.Claim(req.ToClaim(), r.alloc)
|
|
|
|
// after the first claim attempt is made, reset the volume's claims as
|
|
// though it's been released from another node
|
|
*r.hasExistingClaim = false
|
|
*r.schedulable = true
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
resp := reply.(*structs.CSIVolumeClaimResponse)
|
|
resp.PublishContext = map[string]string{}
|
|
resp.Volume = vol
|
|
resp.QueryMeta = structs.QueryMeta{}
|
|
|
|
case "CSIVolume.Unpublish":
|
|
r.callCounts.inc("unpublish")
|
|
resp := reply.(*structs.CSIVolumeUnpublishResponse)
|
|
resp.QueryMeta = structs.QueryMeta{}
|
|
|
|
default:
|
|
return fmt.Errorf("unexpected method")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// testVolume is a helper that optionally starts as unschedulable / claimed, so
|
|
// that we can test retryable vs non-retryable failures
|
|
func (r mockRPCer) testVolume(id string) *structs.CSIVolume {
|
|
vol := structs.NewCSIVolume(id, 0)
|
|
vol.Schedulable = *r.schedulable
|
|
vol.PluginID = "plugin-" + id
|
|
vol.RequestedCapabilities = []*structs.CSIVolumeCapability{
|
|
{
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
},
|
|
{
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter,
|
|
},
|
|
}
|
|
|
|
if *r.hasExistingClaim {
|
|
vol.AccessMode = structs.CSIVolumeAccessModeSingleNodeReader
|
|
vol.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem
|
|
vol.ReadClaims["another-alloc-id"] = &structs.CSIVolumeClaim{
|
|
AllocationID: "another-alloc-id",
|
|
NodeID: "another-node-id",
|
|
Mode: structs.CSIVolumeClaimRead,
|
|
AccessMode: structs.CSIVolumeAccessModeSingleNodeReader,
|
|
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
|
State: structs.CSIVolumeClaimStateTaken,
|
|
}
|
|
}
|
|
|
|
return vol
|
|
}
|
|
|
|
type mockVolumeMounter struct {
|
|
hasMounts bool
|
|
failsFirstUnmount *bool
|
|
callCounts *callCounter
|
|
}
|
|
|
|
func (vm mockVolumeMounter) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usageOpts *csimanager.UsageOptions, publishContext map[string]string) (*csimanager.MountInfo, error) {
|
|
vm.callCounts.inc("mount")
|
|
return &csimanager.MountInfo{
|
|
Source: filepath.Join("test-alloc-dir", alloc.ID, vol.ID, usageOpts.ToFS()),
|
|
}, nil
|
|
}
|
|
|
|
func (vm mockVolumeMounter) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usageOpts *csimanager.UsageOptions) error {
|
|
vm.callCounts.inc("unmount")
|
|
|
|
if *vm.failsFirstUnmount {
|
|
*vm.failsFirstUnmount = false
|
|
return fmt.Errorf("could not unmount")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (vm mockVolumeMounter) HasMount(_ context.Context, mountInfo *csimanager.MountInfo) (bool, error) {
|
|
vm.callCounts.inc("hasMount")
|
|
return mountInfo != nil && vm.hasMounts, nil
|
|
}
|
|
|
|
func (vm mockVolumeMounter) ExternalID() string {
|
|
return "i-example"
|
|
}
|
|
|
|
type mockPluginManager struct {
|
|
mounter mockVolumeMounter
|
|
}
|
|
|
|
func (mgr mockPluginManager) WaitForPlugin(ctx context.Context, pluginType, pluginID string) error {
|
|
return nil
|
|
}
|
|
|
|
func (mgr mockPluginManager) MounterForPlugin(ctx context.Context, pluginID string) (csimanager.VolumeMounter, error) {
|
|
return mgr.mounter, nil
|
|
}
|
|
|
|
// no-op methods to fulfill the interface
|
|
func (mgr mockPluginManager) PluginManager() pluginmanager.PluginManager { return nil }
|
|
func (mgr mockPluginManager) Shutdown() {}
|
|
|
|
type mockAllocRunner struct {
|
|
res *cstructs.AllocHookResources
|
|
caps *drivers.Capabilities
|
|
capFunc func() (*drivers.Capabilities, error)
|
|
|
|
stubs map[string]*state.CSIVolumeStub
|
|
stubFunc func() (map[string]*state.CSIVolumeStub, error)
|
|
}
|
|
|
|
func (ar mockAllocRunner) GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) {
|
|
if ar.capFunc != nil {
|
|
return ar.capFunc()
|
|
}
|
|
return ar.caps, nil
|
|
}
|
|
|
|
func (ar mockAllocRunner) SetCSIVolumes(stubs map[string]*state.CSIVolumeStub) error {
|
|
ar.stubs = stubs
|
|
return nil
|
|
}
|
|
|
|
func (ar mockAllocRunner) GetCSIVolumes() (map[string]*state.CSIVolumeStub, error) {
|
|
if ar.stubFunc != nil {
|
|
return ar.stubFunc()
|
|
}
|
|
return ar.stubs, nil
|
|
}
|