CSI: move node unmount to server-driven RPCs (#7596)
If a volume-claiming alloc stops and the CSI Node plugin that serves that alloc's volumes is missing, there's no way for the allocrunner hook to send the `NodeUnpublish` and `NodeUnstage` RPCs. This changeset addresses this issue with a redesign of the client-side for CSI. Rather than unmounting in the alloc runner hook, the alloc runner hook will simply exit. When the server gets the `Node.UpdateAlloc` for the terminal allocation that had a volume claim, it creates a volume claim GC job. This job will made client RPCs to a new node plugin RPC endpoint, and only once that succeeds, move on to making the client RPCs to the controller plugin. If the node plugin is unavailable, the GC job will fail and be requeued.
This commit is contained in:
parent
3bc13a715e
commit
f6b3d38eb8
|
@ -5,7 +5,6 @@ import (
|
|||
"fmt"
|
||||
|
||||
hclog "github.com/hashicorp/go-hclog"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
@ -45,7 +44,7 @@ func (c *csiHook) Prerun() error {
|
|||
|
||||
mounts := make(map[string]*csimanager.MountInfo, len(volumes))
|
||||
for alias, pair := range volumes {
|
||||
mounter, err := c.csimanager.MounterForVolume(ctx, pair.volume)
|
||||
mounter, err := c.csimanager.MounterForPlugin(ctx, pair.volume.PluginID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -72,53 +71,6 @@ func (c *csiHook) Prerun() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *csiHook) Postrun() error {
|
||||
if !c.shouldRun() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO(tgross): the contexts for the CSI RPC calls made during
|
||||
// mounting can have very long timeouts. Until these are better
|
||||
// tuned, there's not a good value to put here for a WithCancel
|
||||
// without risking conflicts with the grpc retries/timeouts in the
|
||||
// pluginmanager package.
|
||||
ctx := context.TODO()
|
||||
|
||||
volumes, err := c.csiVolumesFromAlloc()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// For Postrun, we accumulate all unmount errors, rather than stopping on the
|
||||
// first failure. This is because we want to make a best effort to free all
|
||||
// storage, and in some cases there may be incorrect errors from volumes that
|
||||
// never mounted correctly during prerun when an alloc is failed. It may also
|
||||
// fail because a volume was externally deleted while in use by this alloc.
|
||||
var result *multierror.Error
|
||||
|
||||
for _, pair := range volumes {
|
||||
mounter, err := c.csimanager.MounterForVolume(ctx, pair.volume)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
continue
|
||||
}
|
||||
|
||||
usageOpts := &csimanager.UsageOptions{
|
||||
ReadOnly: pair.request.ReadOnly,
|
||||
AttachmentMode: string(pair.volume.AttachmentMode),
|
||||
AccessMode: string(pair.volume.AccessMode),
|
||||
}
|
||||
|
||||
err = mounter.UnmountVolume(ctx, pair.volume, c.alloc, usageOpts)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return result.ErrorOrNil()
|
||||
}
|
||||
|
||||
type volumeAndRequest struct {
|
||||
volume *structs.CSIVolume
|
||||
request *structs.VolumeRequest
|
||||
|
@ -172,42 +124,6 @@ func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error)
|
|||
return result, nil
|
||||
}
|
||||
|
||||
// csiVolumesFromAlloc finds all the CSI Volume requests from the allocation's
|
||||
// task group and then fetches them from the Nomad Server, before returning
|
||||
// them in the form of map[RequestedAlias]*volumeAndReqest. This allows us to
|
||||
// thread the request context through to determine usage options for each volume.
|
||||
//
|
||||
// If any volume fails to validate then we return an error.
|
||||
func (c *csiHook) csiVolumesFromAlloc() (map[string]*volumeAndRequest, error) {
|
||||
vols := make(map[string]*volumeAndRequest)
|
||||
tg := c.alloc.Job.LookupTaskGroup(c.alloc.TaskGroup)
|
||||
for alias, vol := range tg.Volumes {
|
||||
if vol.Type == structs.VolumeTypeCSI {
|
||||
vols[alias] = &volumeAndRequest{request: vol}
|
||||
}
|
||||
}
|
||||
|
||||
for alias, pair := range vols {
|
||||
req := &structs.CSIVolumeGetRequest{
|
||||
ID: pair.request.Source,
|
||||
}
|
||||
req.Region = c.alloc.Job.Region
|
||||
|
||||
var resp structs.CSIVolumeGetResponse
|
||||
if err := c.rpcClient.RPC("CSIVolume.Get", req, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.Volume == nil {
|
||||
return nil, fmt.Errorf("Unexpected nil volume returned for ID: %v", pair.request.Source)
|
||||
}
|
||||
|
||||
vols[alias].volume = resp.Volume
|
||||
}
|
||||
|
||||
return vols, nil
|
||||
}
|
||||
|
||||
func newCSIHook(logger hclog.Logger, alloc *structs.Allocation, rpcClient RPCer, csi csimanager.Manager, updater hookResourceSetter) *csiHook {
|
||||
return &csiHook{
|
||||
alloc: alloc,
|
||||
|
|
|
@ -8,13 +8,14 @@ import (
|
|||
metrics "github.com/armon/go-metrics"
|
||||
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
|
||||
"github.com/hashicorp/nomad/client/dynamicplugins"
|
||||
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
|
||||
"github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/plugins/csi"
|
||||
)
|
||||
|
||||
// CSIController endpoint is used for interacting with CSI plugins on a client.
|
||||
// CSI endpoint is used for interacting with CSI plugins on a client.
|
||||
// TODO: Submit metrics with labels to allow debugging per plugin perf problems.
|
||||
type CSIController struct {
|
||||
type CSI struct {
|
||||
c *Client
|
||||
}
|
||||
|
||||
|
@ -29,10 +30,10 @@ var (
|
|||
ErrPluginTypeError = errors.New("CSI Plugin loaded incorrectly")
|
||||
)
|
||||
|
||||
// ValidateVolume is used during volume registration to validate
|
||||
// ControllerValidateVolume is used during volume registration to validate
|
||||
// that a volume exists and that the capabilities it was registered with are
|
||||
// supported by the CSI Plugin and external volume configuration.
|
||||
func (c *CSIController) ValidateVolume(req *structs.ClientCSIControllerValidateVolumeRequest, resp *structs.ClientCSIControllerValidateVolumeResponse) error {
|
||||
func (c *CSI) ControllerValidateVolume(req *structs.ClientCSIControllerValidateVolumeRequest, resp *structs.ClientCSIControllerValidateVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"client", "csi_controller", "validate_volume"}, time.Now())
|
||||
|
||||
if req.VolumeID == "" {
|
||||
|
@ -65,7 +66,7 @@ func (c *CSIController) ValidateVolume(req *structs.ClientCSIControllerValidateV
|
|||
grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
|
||||
}
|
||||
|
||||
// AttachVolume is used to attach a volume from a CSI Cluster to
|
||||
// ControllerAttachVolume is used to attach a volume from a CSI Cluster to
|
||||
// the storage node provided in the request.
|
||||
//
|
||||
// The controller attachment flow currently works as follows:
|
||||
|
@ -73,7 +74,7 @@ func (c *CSIController) ValidateVolume(req *structs.ClientCSIControllerValidateV
|
|||
// 2. Call ControllerPublishVolume on the CSI Plugin to trigger a remote attachment
|
||||
//
|
||||
// In the future this may be expanded to request dynamic secrets for attachment.
|
||||
func (c *CSIController) AttachVolume(req *structs.ClientCSIControllerAttachVolumeRequest, resp *structs.ClientCSIControllerAttachVolumeResponse) error {
|
||||
func (c *CSI) ControllerAttachVolume(req *structs.ClientCSIControllerAttachVolumeRequest, resp *structs.ClientCSIControllerAttachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"client", "csi_controller", "publish_volume"}, time.Now())
|
||||
plugin, err := c.findControllerPlugin(req.PluginID)
|
||||
if err != nil {
|
||||
|
@ -116,9 +117,9 @@ func (c *CSIController) AttachVolume(req *structs.ClientCSIControllerAttachVolum
|
|||
return nil
|
||||
}
|
||||
|
||||
// DetachVolume is used to detach a volume from a CSI Cluster from
|
||||
// ControllerDetachVolume is used to detach a volume from a CSI Cluster from
|
||||
// the storage node provided in the request.
|
||||
func (c *CSIController) DetachVolume(req *structs.ClientCSIControllerDetachVolumeRequest, resp *structs.ClientCSIControllerDetachVolumeResponse) error {
|
||||
func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolumeRequest, resp *structs.ClientCSIControllerDetachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"client", "csi_controller", "unpublish_volume"}, time.Now())
|
||||
plugin, err := c.findControllerPlugin(req.PluginID)
|
||||
if err != nil {
|
||||
|
@ -157,12 +158,50 @@ func (c *CSIController) DetachVolume(req *structs.ClientCSIControllerDetachVolum
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *CSIController) findControllerPlugin(name string) (csi.CSIPlugin, error) {
|
||||
// NodeDetachVolume is used to detach a volume from a CSI Cluster from
|
||||
// the storage node provided in the request.
|
||||
func (c *CSI) NodeDetachVolume(req *structs.ClientCSINodeDetachVolumeRequest, resp *structs.ClientCSINodeDetachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"client", "csi_node", "detach_volume"}, time.Now())
|
||||
|
||||
// The following block of validation checks should not be reached on a
|
||||
// real Nomad cluster. They serve as a defensive check before forwarding
|
||||
// requests to plugins, and to aid with development.
|
||||
if req.PluginID == "" {
|
||||
return errors.New("PluginID is required")
|
||||
}
|
||||
if req.VolumeID == "" {
|
||||
return errors.New("VolumeID is required")
|
||||
}
|
||||
if req.AllocID == "" {
|
||||
return errors.New("AllocID is required")
|
||||
}
|
||||
|
||||
ctx, cancelFn := c.requestContext()
|
||||
defer cancelFn()
|
||||
|
||||
mounter, err := c.c.csimanager.MounterForPlugin(ctx, req.PluginID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usageOpts := &csimanager.UsageOptions{
|
||||
ReadOnly: req.ReadOnly,
|
||||
AttachmentMode: string(req.AttachmentMode),
|
||||
AccessMode: string(req.AccessMode),
|
||||
}
|
||||
|
||||
err = mounter.UnmountVolume(ctx, req.VolumeID, req.AllocID, usageOpts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CSI) findControllerPlugin(name string) (csi.CSIPlugin, error) {
|
||||
return c.findPlugin(dynamicplugins.PluginTypeCSIController, name)
|
||||
}
|
||||
|
||||
// TODO: Cache Plugin Clients?
|
||||
func (c *CSIController) findPlugin(ptype, name string) (csi.CSIPlugin, error) {
|
||||
func (c *CSI) findPlugin(ptype, name string) (csi.CSIPlugin, error) {
|
||||
pIface, err := c.c.dynamicRegistry.DispensePlugin(ptype, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -176,6 +215,6 @@ func (c *CSIController) findPlugin(ptype, name string) (csi.CSIPlugin, error) {
|
|||
return plugin, nil
|
||||
}
|
||||
|
||||
func (c *CSIController) requestContext() (context.Context, context.CancelFunc) {
|
||||
func (c *CSI) requestContext() (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(context.Background(), CSIPluginRequestTimeout)
|
||||
}
|
|
@ -18,6 +18,12 @@ var fakePlugin = &dynamicplugins.PluginInfo{
|
|||
ConnectionInfo: &dynamicplugins.PluginConnectionInfo{},
|
||||
}
|
||||
|
||||
var fakeNodePlugin = &dynamicplugins.PluginInfo{
|
||||
Name: "test-plugin",
|
||||
Type: "csi-node",
|
||||
ConnectionInfo: &dynamicplugins.PluginConnectionInfo{},
|
||||
}
|
||||
|
||||
func TestCSIController_AttachVolume(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
@ -156,7 +162,7 @@ func TestCSIController_AttachVolume(t *testing.T) {
|
|||
require.Nil(err)
|
||||
|
||||
var resp structs.ClientCSIControllerAttachVolumeResponse
|
||||
err = client.ClientRPC("CSIController.AttachVolume", tc.Request, &resp)
|
||||
err = client.ClientRPC("CSI.ControllerAttachVolume", tc.Request, &resp)
|
||||
require.Equal(tc.ExpectedErr, err)
|
||||
if tc.ExpectedResponse != nil {
|
||||
require.Equal(tc.ExpectedResponse, &resp)
|
||||
|
@ -255,7 +261,7 @@ func TestCSIController_ValidateVolume(t *testing.T) {
|
|||
require.Nil(err)
|
||||
|
||||
var resp structs.ClientCSIControllerValidateVolumeResponse
|
||||
err = client.ClientRPC("CSIController.ValidateVolume", tc.Request, &resp)
|
||||
err = client.ClientRPC("CSI.ControllerValidateVolume", tc.Request, &resp)
|
||||
require.Equal(tc.ExpectedErr, err)
|
||||
if tc.ExpectedResponse != nil {
|
||||
require.Equal(tc.ExpectedResponse, &resp)
|
||||
|
@ -338,7 +344,88 @@ func TestCSIController_DetachVolume(t *testing.T) {
|
|||
require.Nil(err)
|
||||
|
||||
var resp structs.ClientCSIControllerDetachVolumeResponse
|
||||
err = client.ClientRPC("CSIController.DetachVolume", tc.Request, &resp)
|
||||
err = client.ClientRPC("CSI.ControllerDetachVolume", tc.Request, &resp)
|
||||
require.Equal(tc.ExpectedErr, err)
|
||||
if tc.ExpectedResponse != nil {
|
||||
require.Equal(tc.ExpectedResponse, &resp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCSINode_DetachVolume(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
ClientSetupFunc func(*fake.Client)
|
||||
Request *structs.ClientCSINodeDetachVolumeRequest
|
||||
ExpectedErr error
|
||||
ExpectedResponse *structs.ClientCSINodeDetachVolumeResponse
|
||||
}{
|
||||
{
|
||||
Name: "returns plugin not found errors",
|
||||
Request: &structs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: "some-garbage",
|
||||
VolumeID: "-",
|
||||
AllocID: "-",
|
||||
NodeID: "-",
|
||||
AttachmentMode: nstructs.CSIVolumeAttachmentModeFilesystem,
|
||||
AccessMode: nstructs.CSIVolumeAccessModeMultiNodeReader,
|
||||
ReadOnly: true,
|
||||
},
|
||||
ExpectedErr: errors.New("plugin some-garbage for type csi-node not found"),
|
||||
},
|
||||
{
|
||||
Name: "validates volumeid is not empty",
|
||||
Request: &structs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: fakeNodePlugin.Name,
|
||||
},
|
||||
ExpectedErr: errors.New("VolumeID is required"),
|
||||
},
|
||||
{
|
||||
Name: "validates nodeid is not empty",
|
||||
Request: &structs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: fakeNodePlugin.Name,
|
||||
VolumeID: "1234-4321-1234-4321",
|
||||
},
|
||||
ExpectedErr: errors.New("AllocID is required"),
|
||||
},
|
||||
{
|
||||
Name: "returns transitive errors",
|
||||
ClientSetupFunc: func(fc *fake.Client) {
|
||||
fc.NextNodeUnpublishVolumeErr = errors.New("wont-see-this")
|
||||
},
|
||||
Request: &structs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: fakeNodePlugin.Name,
|
||||
VolumeID: "1234-4321-1234-4321",
|
||||
AllocID: "4321-1234-4321-1234",
|
||||
},
|
||||
// we don't have a csimanager in this context
|
||||
ExpectedErr: errors.New("plugin test-plugin for type csi-node not found"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
require := require.New(t)
|
||||
client, cleanup := TestClient(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
fakeClient := &fake.Client{}
|
||||
if tc.ClientSetupFunc != nil {
|
||||
tc.ClientSetupFunc(fakeClient)
|
||||
}
|
||||
|
||||
dispenserFunc := func(*dynamicplugins.PluginInfo) (interface{}, error) {
|
||||
return fakeClient, nil
|
||||
}
|
||||
client.dynamicRegistry.StubDispenserForType(dynamicplugins.PluginTypeCSINode, dispenserFunc)
|
||||
err := client.dynamicRegistry.RegisterPlugin(fakeNodePlugin)
|
||||
require.Nil(err)
|
||||
|
||||
var resp structs.ClientCSINodeDetachVolumeResponse
|
||||
err = client.ClientRPC("CSI.NodeDetachVolume", tc.Request, &resp)
|
||||
require.Equal(tc.ExpectedErr, err)
|
||||
if tc.ExpectedResponse != nil {
|
||||
require.Equal(tc.ExpectedResponse, &resp)
|
|
@ -2,17 +2,12 @@ package csimanager
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/client/pluginmanager"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
var (
|
||||
PluginNotFoundErr = errors.New("Plugin not found")
|
||||
)
|
||||
|
||||
type MountInfo struct {
|
||||
Source string
|
||||
IsDevice bool
|
||||
|
@ -47,17 +42,16 @@ func (u *UsageOptions) ToFS() string {
|
|||
|
||||
type VolumeMounter interface {
|
||||
MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usageOpts *UsageOptions, publishContext map[string]string) (*MountInfo, error)
|
||||
UnmountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usageOpts *UsageOptions) error
|
||||
UnmountVolume(ctx context.Context, volID, allocID string, usageOpts *UsageOptions) error
|
||||
}
|
||||
|
||||
type Manager interface {
|
||||
// PluginManager returns a PluginManager for use by the node fingerprinter.
|
||||
PluginManager() pluginmanager.PluginManager
|
||||
|
||||
// MounterForVolume returns a VolumeMounter for the given requested volume.
|
||||
// If there is no plugin registered for this volume type, a PluginNotFoundErr
|
||||
// will be returned.
|
||||
MounterForVolume(ctx context.Context, volume *structs.CSIVolume) (VolumeMounter, error)
|
||||
// MounterForPlugin returns a VolumeMounter for the plugin ID associated
|
||||
// with the volume. Returns an error if this plugin isn't registered.
|
||||
MounterForPlugin(ctx context.Context, pluginID string) (VolumeMounter, error)
|
||||
|
||||
// Shutdown shuts down the Manager and unmounts any locally attached volumes.
|
||||
Shutdown()
|
||||
|
|
|
@ -2,6 +2,7 @@ package csimanager
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -73,15 +74,15 @@ func (c *csiManager) PluginManager() pluginmanager.PluginManager {
|
|||
return c
|
||||
}
|
||||
|
||||
func (c *csiManager) MounterForVolume(ctx context.Context, vol *structs.CSIVolume) (VolumeMounter, error) {
|
||||
func (c *csiManager) MounterForPlugin(ctx context.Context, pluginID string) (VolumeMounter, error) {
|
||||
nodePlugins, hasAnyNodePlugins := c.instances["csi-node"]
|
||||
if !hasAnyNodePlugins {
|
||||
return nil, PluginNotFoundErr
|
||||
return nil, fmt.Errorf("no storage node plugins found")
|
||||
}
|
||||
|
||||
mgr, hasPlugin := nodePlugins[vol.PluginID]
|
||||
mgr, hasPlugin := nodePlugins[pluginID]
|
||||
if !hasPlugin {
|
||||
return nil, PluginNotFoundErr
|
||||
return nil, fmt.Errorf("plugin %s for type csi-node not found", pluginID)
|
||||
}
|
||||
|
||||
return mgr.VolumeMounter(ctx)
|
||||
|
|
|
@ -2,8 +2,6 @@ package csimanager
|
|||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// volumeUsageTracker tracks the allocations that depend on a given volume
|
||||
|
@ -20,7 +18,7 @@ func newVolumeUsageTracker() *volumeUsageTracker {
|
|||
}
|
||||
|
||||
type volumeUsageKey struct {
|
||||
volume *structs.CSIVolume
|
||||
id string
|
||||
usageOpts UsageOptions
|
||||
}
|
||||
|
||||
|
@ -28,17 +26,17 @@ func (v *volumeUsageTracker) allocsForKey(key volumeUsageKey) []string {
|
|||
return v.state[key]
|
||||
}
|
||||
|
||||
func (v *volumeUsageTracker) appendAlloc(key volumeUsageKey, alloc *structs.Allocation) {
|
||||
func (v *volumeUsageTracker) appendAlloc(key volumeUsageKey, allocID string) {
|
||||
allocs := v.allocsForKey(key)
|
||||
allocs = append(allocs, alloc.ID)
|
||||
allocs = append(allocs, allocID)
|
||||
v.state[key] = allocs
|
||||
}
|
||||
|
||||
func (v *volumeUsageTracker) removeAlloc(key volumeUsageKey, needle *structs.Allocation) {
|
||||
func (v *volumeUsageTracker) removeAlloc(key volumeUsageKey, needle string) {
|
||||
allocs := v.allocsForKey(key)
|
||||
var newAllocs []string
|
||||
for _, allocID := range allocs {
|
||||
if allocID != needle.ID {
|
||||
if allocID != needle {
|
||||
newAllocs = append(newAllocs, allocID)
|
||||
}
|
||||
}
|
||||
|
@ -50,22 +48,22 @@ func (v *volumeUsageTracker) removeAlloc(key volumeUsageKey, needle *structs.All
|
|||
}
|
||||
}
|
||||
|
||||
func (v *volumeUsageTracker) Claim(alloc *structs.Allocation, volume *structs.CSIVolume, usage *UsageOptions) {
|
||||
func (v *volumeUsageTracker) Claim(allocID, volID string, usage *UsageOptions) {
|
||||
v.stateMu.Lock()
|
||||
defer v.stateMu.Unlock()
|
||||
|
||||
key := volumeUsageKey{volume: volume, usageOpts: *usage}
|
||||
v.appendAlloc(key, alloc)
|
||||
key := volumeUsageKey{id: volID, usageOpts: *usage}
|
||||
v.appendAlloc(key, allocID)
|
||||
}
|
||||
|
||||
// Free removes the allocation from the state list for the given alloc. If the
|
||||
// alloc is the last allocation for the volume then it returns true.
|
||||
func (v *volumeUsageTracker) Free(alloc *structs.Allocation, volume *structs.CSIVolume, usage *UsageOptions) bool {
|
||||
func (v *volumeUsageTracker) Free(allocID, volID string, usage *UsageOptions) bool {
|
||||
v.stateMu.Lock()
|
||||
defer v.stateMu.Unlock()
|
||||
|
||||
key := volumeUsageKey{volume: volume, usageOpts: *usage}
|
||||
v.removeAlloc(key, alloc)
|
||||
key := volumeUsageKey{id: volID, usageOpts: *usage}
|
||||
v.removeAlloc(key, allocID)
|
||||
allocs := v.allocsForKey(key)
|
||||
return len(allocs) == 0
|
||||
}
|
||||
|
|
|
@ -47,13 +47,13 @@ func TestUsageTracker(t *testing.T) {
|
|||
ID: "foo",
|
||||
}
|
||||
for _, alloc := range tc.RegisterAllocs {
|
||||
tracker.Claim(alloc, volume, &UsageOptions{})
|
||||
tracker.Claim(alloc.ID, volume.ID, &UsageOptions{})
|
||||
}
|
||||
|
||||
result := false
|
||||
|
||||
for _, alloc := range tc.FreeAllocs {
|
||||
result = tracker.Free(alloc, volume, &UsageOptions{})
|
||||
result = tracker.Free(alloc.ID, volume.ID, &UsageOptions{})
|
||||
}
|
||||
|
||||
require.Equal(t, tc.ExpectedResult, result, "Tracker State: %#v", tracker.state)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
|
||||
|
@ -62,12 +63,12 @@ func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.
|
|||
}
|
||||
}
|
||||
|
||||
func (v *volumeManager) stagingDirForVolume(root string, vol *structs.CSIVolume, usage *UsageOptions) string {
|
||||
return filepath.Join(root, StagingDirName, vol.ID, usage.ToFS())
|
||||
func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string {
|
||||
return filepath.Join(root, StagingDirName, volID, usage.ToFS())
|
||||
}
|
||||
|
||||
func (v *volumeManager) allocDirForVolume(root string, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) string {
|
||||
return filepath.Join(root, AllocSpecificDirName, alloc.ID, vol.ID, usage.ToFS())
|
||||
func (v *volumeManager) allocDirForVolume(root string, volID, allocID string, usage *UsageOptions) string {
|
||||
return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS())
|
||||
}
|
||||
|
||||
// ensureStagingDir attempts to create a directory for use when staging a volume
|
||||
|
@ -77,7 +78,7 @@ func (v *volumeManager) allocDirForVolume(root string, vol *structs.CSIVolume, a
|
|||
// Returns whether the directory is a pre-existing mountpoint, the staging path,
|
||||
// and any errors that occurred.
|
||||
func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) {
|
||||
stagingPath := v.stagingDirForVolume(v.mountRoot, vol, usage)
|
||||
stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage)
|
||||
|
||||
// Make the staging path, owned by the Nomad User
|
||||
if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) {
|
||||
|
@ -102,7 +103,7 @@ func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOpt
|
|||
// Returns whether the directory is a pre-existing mountpoint, the publish path,
|
||||
// and any errors that occurred.
|
||||
func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) {
|
||||
allocPath := v.allocDirForVolume(v.mountRoot, vol, alloc, usage)
|
||||
allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID, usage)
|
||||
|
||||
// Make the alloc path, owned by the Nomad User
|
||||
if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) {
|
||||
|
@ -148,7 +149,7 @@ func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume,
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol, usage)
|
||||
pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
|
||||
|
||||
logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath)
|
||||
|
||||
|
@ -179,14 +180,14 @@ func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolum
|
|||
logger := hclog.FromContext(ctx)
|
||||
var pluginStagingPath string
|
||||
if v.requiresStaging {
|
||||
pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol, usage)
|
||||
pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
|
||||
}
|
||||
|
||||
hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, vol, alloc, usage)
|
||||
pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage)
|
||||
|
||||
if isMount {
|
||||
logger.Debug("Re-using existing published volume for allocation")
|
||||
|
@ -235,13 +236,12 @@ func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume,
|
|||
}
|
||||
|
||||
if err == nil {
|
||||
v.usageTracker.Claim(alloc, vol, usage)
|
||||
v.usageTracker.Claim(alloc.ID, vol.ID, usage)
|
||||
}
|
||||
|
||||
event := structs.NewNodeEvent().
|
||||
SetSubsystem(structs.NodeEventSubsystemStorage).
|
||||
SetMessage("Mount volume").
|
||||
AddDetail("volume_namespace", vol.Namespace).
|
||||
AddDetail("volume_id", vol.ID)
|
||||
if err == nil {
|
||||
event.AddDetail("success", "true")
|
||||
|
@ -259,15 +259,15 @@ func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume,
|
|||
// once for each staging path that a volume has been staged under.
|
||||
// It is safe to call multiple times and a plugin is required to return OK if
|
||||
// the volume has been unstaged or was never staged on the node.
|
||||
func (v *volumeManager) unstageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions) error {
|
||||
func (v *volumeManager) unstageVolume(ctx context.Context, volID string, usage *UsageOptions) error {
|
||||
logger := hclog.FromContext(ctx)
|
||||
logger.Trace("Unstaging volume")
|
||||
stagingPath := v.stagingDirForVolume(v.containerMountPoint, vol, usage)
|
||||
stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage)
|
||||
|
||||
// CSI NodeUnstageVolume errors for timeout, codes.Unavailable and
|
||||
// codes.ResourceExhausted are retried; all other errors are fatal.
|
||||
return v.plugin.NodeUnstageVolume(ctx,
|
||||
vol.ID,
|
||||
volID,
|
||||
stagingPath,
|
||||
grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
|
||||
grpc_retry.WithMax(3),
|
||||
|
@ -288,20 +288,25 @@ func combineErrors(maybeErrs ...error) error {
|
|||
return result.ErrorOrNil()
|
||||
}
|
||||
|
||||
func (v *volumeManager) unpublishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) error {
|
||||
pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, vol, alloc, usage)
|
||||
func (v *volumeManager) unpublishVolume(ctx context.Context, volID, allocID string, usage *UsageOptions) error {
|
||||
pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, volID, allocID, usage)
|
||||
|
||||
// CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and
|
||||
// codes.ResourceExhausted are retried; all other errors are fatal.
|
||||
rpcErr := v.plugin.NodeUnpublishVolume(ctx, vol.ID, pluginTargetPath,
|
||||
rpcErr := v.plugin.NodeUnpublishVolume(ctx, volID, pluginTargetPath,
|
||||
grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
|
||||
grpc_retry.WithMax(3),
|
||||
grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
|
||||
)
|
||||
|
||||
hostTargetPath := v.allocDirForVolume(v.mountRoot, vol, alloc, usage)
|
||||
hostTargetPath := v.allocDirForVolume(v.mountRoot, volID, allocID, usage)
|
||||
if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) {
|
||||
// Host Target Path already got destroyed, just return any rpcErr
|
||||
if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") {
|
||||
// host target path was already destroyed, nothing to do here.
|
||||
// this helps us in the case that a previous GC attempt cleaned
|
||||
// up the volume on the node but the controller RPCs failed
|
||||
return nil
|
||||
}
|
||||
return rpcErr
|
||||
}
|
||||
|
||||
|
@ -320,24 +325,23 @@ func (v *volumeManager) unpublishVolume(ctx context.Context, vol *structs.CSIVol
|
|||
return rpcErr
|
||||
}
|
||||
|
||||
func (v *volumeManager) UnmountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (err error) {
|
||||
logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID)
|
||||
func (v *volumeManager) UnmountVolume(ctx context.Context, volID, allocID string, usage *UsageOptions) (err error) {
|
||||
logger := v.logger.With("volume_id", volID, "alloc_id", allocID)
|
||||
ctx = hclog.WithContext(ctx, logger)
|
||||
|
||||
err = v.unpublishVolume(ctx, vol, alloc, usage)
|
||||
err = v.unpublishVolume(ctx, volID, allocID, usage)
|
||||
|
||||
if err == nil {
|
||||
canRelease := v.usageTracker.Free(alloc, vol, usage)
|
||||
canRelease := v.usageTracker.Free(allocID, volID, usage)
|
||||
if v.requiresStaging && canRelease {
|
||||
err = v.unstageVolume(ctx, vol, usage)
|
||||
err = v.unstageVolume(ctx, volID, usage)
|
||||
}
|
||||
}
|
||||
|
||||
event := structs.NewNodeEvent().
|
||||
SetSubsystem(structs.NodeEventSubsystemStorage).
|
||||
SetMessage("Unmount volume").
|
||||
AddDetail("volume_namespace", vol.Namespace).
|
||||
AddDetail("volume_id", vol.ID)
|
||||
AddDetail("volume_id", volID)
|
||||
if err == nil {
|
||||
event.AddDetail("success", "true")
|
||||
} else {
|
||||
|
|
|
@ -83,7 +83,7 @@ func TestVolumeManager_ensureStagingDir(t *testing.T) {
|
|||
csiFake := &csifake.Client{}
|
||||
eventer := func(e *structs.NodeEvent) {}
|
||||
manager := newVolumeManager(testlog.HCLogger(t), eventer, csiFake, tmpPath, tmpPath, true)
|
||||
expectedStagingPath := manager.stagingDirForVolume(tmpPath, tc.Volume, tc.UsageOptions)
|
||||
expectedStagingPath := manager.stagingDirForVolume(tmpPath, tc.Volume.ID, tc.UsageOptions)
|
||||
|
||||
if tc.CreateDirAheadOfTime {
|
||||
err := os.MkdirAll(expectedStagingPath, 0700)
|
||||
|
@ -236,7 +236,7 @@ func TestVolumeManager_unstageVolume(t *testing.T) {
|
|||
manager := newVolumeManager(testlog.HCLogger(t), eventer, csiFake, tmpPath, tmpPath, true)
|
||||
ctx := context.Background()
|
||||
|
||||
err := manager.unstageVolume(ctx, tc.Volume, tc.UsageOptions)
|
||||
err := manager.unstageVolume(ctx, tc.Volume.ID, tc.UsageOptions)
|
||||
|
||||
if tc.ExpectedErr != nil {
|
||||
require.EqualError(t, err, tc.ExpectedErr.Error())
|
||||
|
@ -416,7 +416,7 @@ func TestVolumeManager_unpublishVolume(t *testing.T) {
|
|||
manager := newVolumeManager(testlog.HCLogger(t), eventer, csiFake, tmpPath, tmpPath, true)
|
||||
ctx := context.Background()
|
||||
|
||||
err := manager.unpublishVolume(ctx, tc.Volume, tc.Allocation, tc.UsageOptions)
|
||||
err := manager.unpublishVolume(ctx, tc.Volume.ID, tc.Allocation.ID, tc.UsageOptions)
|
||||
|
||||
if tc.ExpectedErr != nil {
|
||||
require.EqualError(t, err, tc.ExpectedErr.Error())
|
||||
|
@ -460,7 +460,6 @@ func TestVolumeManager_MountVolumeEvents(t *testing.T) {
|
|||
require.Equal(t, "Mount volume", e.Message)
|
||||
require.Equal(t, "Storage", e.Subsystem)
|
||||
require.Equal(t, "vol", e.Details["volume_id"])
|
||||
require.Equal(t, "ns", e.Details["volume_namespace"])
|
||||
require.Equal(t, "false", e.Details["success"])
|
||||
require.Equal(t, "Unknown volume attachment mode: ", e.Details["error"])
|
||||
events = events[1:]
|
||||
|
@ -474,11 +473,10 @@ func TestVolumeManager_MountVolumeEvents(t *testing.T) {
|
|||
require.Equal(t, "Mount volume", e.Message)
|
||||
require.Equal(t, "Storage", e.Subsystem)
|
||||
require.Equal(t, "vol", e.Details["volume_id"])
|
||||
require.Equal(t, "ns", e.Details["volume_namespace"])
|
||||
require.Equal(t, "true", e.Details["success"])
|
||||
events = events[1:]
|
||||
|
||||
err = manager.UnmountVolume(ctx, vol, alloc, usage)
|
||||
err = manager.UnmountVolume(ctx, vol.ID, alloc.ID, usage)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, 1, len(events))
|
||||
|
@ -486,6 +484,5 @@ func TestVolumeManager_MountVolumeEvents(t *testing.T) {
|
|||
require.Equal(t, "Unmount volume", e.Message)
|
||||
require.Equal(t, "Storage", e.Subsystem)
|
||||
require.Equal(t, "vol", e.Details["volume_id"])
|
||||
require.Equal(t, "ns", e.Details["volume_namespace"])
|
||||
require.Equal(t, "true", e.Details["success"])
|
||||
}
|
||||
|
|
|
@ -20,11 +20,11 @@ import (
|
|||
|
||||
// rpcEndpoints holds the RPC endpoints
|
||||
type rpcEndpoints struct {
|
||||
ClientStats *ClientStats
|
||||
CSIController *CSIController
|
||||
FileSystem *FileSystem
|
||||
Allocations *Allocations
|
||||
Agent *Agent
|
||||
ClientStats *ClientStats
|
||||
CSI *CSI
|
||||
FileSystem *FileSystem
|
||||
Allocations *Allocations
|
||||
Agent *Agent
|
||||
}
|
||||
|
||||
// ClientRPC is used to make a local, client only RPC call
|
||||
|
@ -218,7 +218,7 @@ func (c *Client) streamingRpcConn(server *servers.Server, method string) (net.Co
|
|||
func (c *Client) setupClientRpc() {
|
||||
// Initialize the RPC handlers
|
||||
c.endpoints.ClientStats = &ClientStats{c}
|
||||
c.endpoints.CSIController = &CSIController{c}
|
||||
c.endpoints.CSI = &CSI{c}
|
||||
c.endpoints.FileSystem = NewFileSystemEndpoint(c)
|
||||
c.endpoints.Allocations = NewAllocationsEndpoint(c)
|
||||
c.endpoints.Agent = NewAgentEndpoint(c)
|
||||
|
@ -236,7 +236,7 @@ func (c *Client) setupClientRpc() {
|
|||
func (c *Client) setupClientRpcServer(server *rpc.Server) {
|
||||
// Register the endpoints
|
||||
server.Register(c.endpoints.ClientStats)
|
||||
server.Register(c.endpoints.CSIController)
|
||||
server.Register(c.endpoints.CSI)
|
||||
server.Register(c.endpoints.FileSystem)
|
||||
server.Register(c.endpoints.Allocations)
|
||||
server.Register(c.endpoints.Agent)
|
||||
|
|
|
@ -87,6 +87,9 @@ func (c *ClientCSIControllerAttachVolumeRequest) ToCSIRequest() (*csi.Controller
|
|||
}, nil
|
||||
}
|
||||
|
||||
// ClientCSIControllerDetachVolumeRequest is the RPC made from the server to
|
||||
// a Nomad client to tell a CSI controller plugin on that client to perform
|
||||
// ControllerUnpublish for a volume on a specific client.
|
||||
type ClientCSIControllerAttachVolumeResponse struct {
|
||||
// Opaque static publish properties of the volume. SP MAY use this
|
||||
// field to ensure subsequent `NodeStageVolume` or `NodePublishVolume`
|
||||
|
@ -129,3 +132,21 @@ func (c *ClientCSIControllerDetachVolumeRequest) ToCSIRequest() *csi.ControllerU
|
|||
}
|
||||
|
||||
type ClientCSIControllerDetachVolumeResponse struct{}
|
||||
|
||||
// ClientCSINodeDetachVolumeRequest is the RPC made from the server to
|
||||
// a Nomad client to tell a CSI node plugin on that client to perform
|
||||
// NodeUnpublish and NodeUnstage.
|
||||
type ClientCSINodeDetachVolumeRequest struct {
|
||||
PluginID string // ID of the plugin that manages the volume (required)
|
||||
VolumeID string // ID of the volume to be unpublished (required)
|
||||
AllocID string // ID of the allocation we're unpublishing for (required)
|
||||
NodeID string // ID of the Nomad client targeted
|
||||
|
||||
// These fields should match the original volume request so that
|
||||
// we can find the mount points on the client
|
||||
AttachmentMode structs.CSIVolumeAttachmentMode
|
||||
AccessMode structs.CSIVolumeAccessMode
|
||||
ReadOnly bool
|
||||
}
|
||||
|
||||
type ClientCSINodeDetachVolumeResponse struct{}
|
||||
|
|
|
@ -3,21 +3,25 @@ package nomad
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// ClientCSIController is used to forward RPC requests to the targed Nomad client's
|
||||
// ClientCSI is used to forward RPC requests to the targed Nomad client's
|
||||
// CSIController endpoint.
|
||||
type ClientCSIController struct {
|
||||
type ClientCSI struct {
|
||||
srv *Server
|
||||
logger log.Logger
|
||||
}
|
||||
|
||||
func (a *ClientCSIController) AttachVolume(args *cstructs.ClientCSIControllerAttachVolumeRequest, reply *cstructs.ClientCSIControllerAttachVolumeResponse) error {
|
||||
func (a *ClientCSI) ControllerAttachVolume(args *cstructs.ClientCSIControllerAttachVolumeRequest, reply *cstructs.ClientCSIControllerAttachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"nomad", "client_csi_controller", "attach_volume"}, time.Now())
|
||||
|
||||
// Verify the arguments.
|
||||
|
@ -39,18 +43,18 @@ func (a *ClientCSIController) AttachVolume(args *cstructs.ClientCSIControllerAtt
|
|||
// Get the connection to the client
|
||||
state, ok := a.srv.getNodeConn(args.ControllerNodeID)
|
||||
if !ok {
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSIController.AttachVolume", args, reply)
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSI.ControllerAttachVolume", args, reply)
|
||||
}
|
||||
|
||||
// Make the RPC
|
||||
err = NodeRpc(state.Session, "CSIController.AttachVolume", args, reply)
|
||||
err = NodeRpc(state.Session, "CSI.ControllerAttachVolume", args, reply)
|
||||
if err != nil {
|
||||
return fmt.Errorf("attach volume: %v", err)
|
||||
return fmt.Errorf("controller attach volume: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *ClientCSIController) ValidateVolume(args *cstructs.ClientCSIControllerValidateVolumeRequest, reply *cstructs.ClientCSIControllerValidateVolumeResponse) error {
|
||||
func (a *ClientCSI) ControllerValidateVolume(args *cstructs.ClientCSIControllerValidateVolumeRequest, reply *cstructs.ClientCSIControllerValidateVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"nomad", "client_csi_controller", "validate_volume"}, time.Now())
|
||||
|
||||
// Verify the arguments.
|
||||
|
@ -72,18 +76,18 @@ func (a *ClientCSIController) ValidateVolume(args *cstructs.ClientCSIControllerV
|
|||
// Get the connection to the client
|
||||
state, ok := a.srv.getNodeConn(args.ControllerNodeID)
|
||||
if !ok {
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSIController.ValidateVolume", args, reply)
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSI.ControllerValidateVolume", args, reply)
|
||||
}
|
||||
|
||||
// Make the RPC
|
||||
err = NodeRpc(state.Session, "CSIController.ValidateVolume", args, reply)
|
||||
err = NodeRpc(state.Session, "CSI.ControllerValidateVolume", args, reply)
|
||||
if err != nil {
|
||||
return fmt.Errorf("validate volume: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *ClientCSIController) DetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error {
|
||||
func (a *ClientCSI) ControllerDetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"nomad", "client_csi_controller", "detach_volume"}, time.Now())
|
||||
|
||||
// Verify the arguments.
|
||||
|
@ -105,14 +109,114 @@ func (a *ClientCSIController) DetachVolume(args *cstructs.ClientCSIControllerDet
|
|||
// Get the connection to the client
|
||||
state, ok := a.srv.getNodeConn(args.ControllerNodeID)
|
||||
if !ok {
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSIController.DetachVolume", args, reply)
|
||||
return findNodeConnAndForward(a.srv, args.ControllerNodeID, "ClientCSI.ControllerDetachVolume", args, reply)
|
||||
}
|
||||
|
||||
// Make the RPC
|
||||
err = NodeRpc(state.Session, "CSIController.DetachVolume", args, reply)
|
||||
err = NodeRpc(state.Session, "CSI.ControllerDetachVolume", args, reply)
|
||||
if err != nil {
|
||||
return fmt.Errorf("detach volume: %v", err)
|
||||
return fmt.Errorf("controller detach volume: %v", err)
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (a *ClientCSI) NodeDetachVolume(args *cstructs.ClientCSINodeDetachVolumeRequest, reply *cstructs.ClientCSINodeDetachVolumeResponse) error {
|
||||
defer metrics.MeasureSince([]string{"nomad", "client_csi_node", "detach_volume"}, time.Now())
|
||||
|
||||
// Make sure Node is valid and new enough to support RPC
|
||||
snap, err := a.srv.State().Snapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = getNodeForRpc(snap, args.NodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the connection to the client
|
||||
state, ok := a.srv.getNodeConn(args.NodeID)
|
||||
if !ok {
|
||||
return findNodeConnAndForward(a.srv, args.NodeID, "ClientCSI.NodeDetachVolume", args, reply)
|
||||
}
|
||||
|
||||
// Make the RPC
|
||||
err = NodeRpc(state.Session, "CSI.NodeDetachVolume", args, reply)
|
||||
if err != nil {
|
||||
return fmt.Errorf("node detach volume: %v", err)
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (srv *Server) volAndPluginLookup(namespace, volID string) (*structs.CSIPlugin, *structs.CSIVolume, error) {
|
||||
state := srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
vol, err := state.CSIVolumeByID(ws, namespace, volID)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if vol == nil {
|
||||
return nil, nil, fmt.Errorf("volume not found: %s", volID)
|
||||
}
|
||||
if !vol.ControllerRequired {
|
||||
return nil, vol, nil
|
||||
}
|
||||
|
||||
// note: we do this same lookup in CSIVolumeByID but then throw
|
||||
// away the pointer to the plugin rather than attaching it to
|
||||
// the volume so we have to do it again here.
|
||||
plug, err := state.CSIPluginByID(ws, vol.PluginID)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if plug == nil {
|
||||
return nil, nil, fmt.Errorf("plugin not found: %s", vol.PluginID)
|
||||
}
|
||||
return plug, vol, nil
|
||||
}
|
||||
|
||||
// nodeForControllerPlugin returns the node ID for a random controller
|
||||
// to load-balance long-blocking RPCs across client nodes.
|
||||
func nodeForControllerPlugin(state *state.StateStore, plugin *structs.CSIPlugin) (string, error) {
|
||||
count := len(plugin.Controllers)
|
||||
if count == 0 {
|
||||
return "", fmt.Errorf("no controllers available for plugin %q", plugin.ID)
|
||||
}
|
||||
snap, err := state.Snapshot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// iterating maps is "random" but unspecified and isn't particularly
|
||||
// random with small maps, so not well-suited for load balancing.
|
||||
// so we shuffle the keys and iterate over them.
|
||||
clientIDs := make([]string, count)
|
||||
for clientID := range plugin.Controllers {
|
||||
clientIDs = append(clientIDs, clientID)
|
||||
}
|
||||
rand.Shuffle(count, func(i, j int) {
|
||||
clientIDs[i], clientIDs[j] = clientIDs[j], clientIDs[i]
|
||||
})
|
||||
|
||||
for _, clientID := range clientIDs {
|
||||
controller := plugin.Controllers[clientID]
|
||||
if !controller.IsController() {
|
||||
// we don't have separate types for CSIInfo depending on
|
||||
// whether it's a controller or node. this error shouldn't
|
||||
// make it to production but is to aid developers during
|
||||
// development
|
||||
err = fmt.Errorf("plugin is not a controller")
|
||||
continue
|
||||
}
|
||||
_, err = getNodeForRpc(snap, clientID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return clientID, nil
|
||||
}
|
||||
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ func TestClientCSIController_AttachVolume_Local(t *testing.T) {
|
|||
|
||||
// Fetch the response
|
||||
var resp structs.GenericResponse
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSIController.AttachVolume", req, &resp)
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerAttachVolume", req, &resp)
|
||||
require.NotNil(err)
|
||||
// Should recieve an error from the client endpoint
|
||||
require.Contains(err.Error(), "must specify plugin name to dispense")
|
||||
|
@ -84,7 +84,7 @@ func TestClientCSIController_AttachVolume_Forwarded(t *testing.T) {
|
|||
|
||||
// Fetch the response
|
||||
var resp structs.GenericResponse
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSIController.AttachVolume", req, &resp)
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerAttachVolume", req, &resp)
|
||||
require.NotNil(err)
|
||||
// Should recieve an error from the client endpoint
|
||||
require.Contains(err.Error(), "must specify plugin name to dispense")
|
||||
|
@ -118,7 +118,7 @@ func TestClientCSIController_DetachVolume_Local(t *testing.T) {
|
|||
|
||||
// Fetch the response
|
||||
var resp structs.GenericResponse
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSIController.DetachVolume", req, &resp)
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerDetachVolume", req, &resp)
|
||||
require.NotNil(err)
|
||||
// Should recieve an error from the client endpoint
|
||||
require.Contains(err.Error(), "must specify plugin name to dispense")
|
||||
|
@ -162,7 +162,7 @@ func TestClientCSIController_DetachVolume_Forwarded(t *testing.T) {
|
|||
|
||||
// Fetch the response
|
||||
var resp structs.GenericResponse
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSIController.DetachVolume", req, &resp)
|
||||
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerDetachVolume", req, &resp)
|
||||
require.NotNil(err)
|
||||
// Should recieve an error from the client endpoint
|
||||
require.Contains(err.Error(), "must specify plugin name to dispense")
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
memdb "github.com/hashicorp/go-memdb"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
version "github.com/hashicorp/go-version"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/scheduler"
|
||||
|
@ -735,8 +736,7 @@ func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error {
|
|||
"job", jobID)
|
||||
return nil
|
||||
}
|
||||
c.volumeClaimReap([]*structs.Job{job}, eval.LeaderACL)
|
||||
return nil
|
||||
return c.volumeClaimReap([]*structs.Job{job}, eval.LeaderACL)
|
||||
}
|
||||
|
||||
// volumeClaimReap contacts the leader and releases volume claims from
|
||||
|
@ -769,69 +769,144 @@ func (c *CoreScheduler) volumeClaimReap(jobs []*structs.Job, leaderACL string) e
|
|||
continue
|
||||
}
|
||||
|
||||
gcAllocs := []string{} // alloc IDs
|
||||
claimedNodes := map[string]struct{}{}
|
||||
knownNodes := []string{}
|
||||
|
||||
collectFunc := func(allocs map[string]*structs.Allocation) {
|
||||
for _, alloc := range allocs {
|
||||
// we call denormalize on the volume above to populate
|
||||
// Allocation pointers. But the alloc might have been
|
||||
// garbage collected concurrently, so if the alloc is
|
||||
// still nil we can safely skip it.
|
||||
if alloc == nil {
|
||||
continue
|
||||
}
|
||||
knownNodes = append(knownNodes, alloc.NodeID)
|
||||
if !alloc.Terminated() {
|
||||
// if there are any unterminated allocs, we
|
||||
// don't want to unpublish the volume, just
|
||||
// release the alloc's claim
|
||||
claimedNodes[alloc.NodeID] = struct{}{}
|
||||
continue
|
||||
}
|
||||
gcAllocs = append(gcAllocs, alloc.ID)
|
||||
}
|
||||
plug, err := c.srv.State().CSIPluginByID(ws, vol.PluginID)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
continue
|
||||
}
|
||||
|
||||
collectFunc(vol.WriteAllocs)
|
||||
collectFunc(vol.ReadAllocs)
|
||||
gcClaims, nodeClaims := collectClaimsToGCImpl(vol)
|
||||
|
||||
req := &structs.CSIVolumeClaimRequest{
|
||||
VolumeID: volID,
|
||||
AllocationID: "", // controller unpublish never uses this field
|
||||
Claim: structs.CSIVolumeClaimRelease,
|
||||
WriteRequest: structs.WriteRequest{
|
||||
Region: job.Region,
|
||||
Namespace: job.Namespace,
|
||||
AuthToken: leaderACL,
|
||||
},
|
||||
}
|
||||
|
||||
// we only emit the controller unpublish if no other allocs
|
||||
// on the node need it, but we also only want to make this
|
||||
// call at most once per node
|
||||
for _, node := range knownNodes {
|
||||
if _, isClaimed := claimedNodes[node]; isClaimed {
|
||||
continue
|
||||
}
|
||||
err = c.srv.controllerUnpublishVolume(req, node)
|
||||
for _, claim := range gcClaims {
|
||||
nodeClaims, err = volumeClaimReapImpl(c.srv,
|
||||
&volumeClaimReapArgs{
|
||||
vol: vol,
|
||||
plug: plug,
|
||||
allocID: claim.allocID,
|
||||
nodeID: claim.nodeID,
|
||||
mode: claim.mode,
|
||||
region: job.Region,
|
||||
namespace: job.Namespace,
|
||||
leaderACL: leaderACL,
|
||||
nodeClaims: nodeClaims,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for _, allocID := range gcAllocs {
|
||||
req.AllocationID = allocID
|
||||
err = c.srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
|
||||
if err != nil {
|
||||
c.logger.Error("volume claim release failed", "error", err)
|
||||
result = multierror.Append(result, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result.ErrorOrNil()
|
||||
}
|
||||
|
||||
type gcClaimRequest struct {
|
||||
allocID string
|
||||
nodeID string
|
||||
mode structs.CSIVolumeClaimMode
|
||||
}
|
||||
|
||||
func collectClaimsToGCImpl(vol *structs.CSIVolume) ([]gcClaimRequest, map[string]int) {
|
||||
gcAllocs := []gcClaimRequest{}
|
||||
nodeClaims := map[string]int{} // node IDs -> count
|
||||
|
||||
collectFunc := func(allocs map[string]*structs.Allocation,
|
||||
mode structs.CSIVolumeClaimMode) {
|
||||
for _, alloc := range allocs {
|
||||
// we call denormalize on the volume above to populate
|
||||
// Allocation pointers. But the alloc might have been
|
||||
// garbage collected concurrently, so if the alloc is
|
||||
// still nil we can safely skip it.
|
||||
if alloc == nil {
|
||||
continue
|
||||
}
|
||||
nodeClaims[alloc.NodeID]++
|
||||
if alloc.Terminated() {
|
||||
gcAllocs = append(gcAllocs, gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: alloc.NodeID,
|
||||
mode: mode,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectFunc(vol.WriteAllocs, structs.CSIVolumeClaimWrite)
|
||||
collectFunc(vol.ReadAllocs, structs.CSIVolumeClaimRead)
|
||||
return gcAllocs, nodeClaims
|
||||
}
|
||||
|
||||
type volumeClaimReapArgs struct {
|
||||
vol *structs.CSIVolume
|
||||
plug *structs.CSIPlugin
|
||||
allocID string
|
||||
nodeID string
|
||||
mode structs.CSIVolumeClaimMode
|
||||
region string
|
||||
namespace string
|
||||
leaderACL string
|
||||
nodeClaims map[string]int // node IDs -> count
|
||||
}
|
||||
|
||||
func volumeClaimReapImpl(srv RPCServer, args *volumeClaimReapArgs) (map[string]int, error) {
|
||||
vol := args.vol
|
||||
nodeID := args.nodeID
|
||||
|
||||
// (1) NodePublish / NodeUnstage must be completed before controller
|
||||
// operations or releasing the claim.
|
||||
nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: args.plug.ID,
|
||||
VolumeID: vol.RemoteID(),
|
||||
AllocID: args.allocID,
|
||||
NodeID: nodeID,
|
||||
AttachmentMode: vol.AttachmentMode,
|
||||
AccessMode: vol.AccessMode,
|
||||
ReadOnly: args.mode == structs.CSIVolumeClaimRead,
|
||||
}
|
||||
err := srv.RPC("ClientCSI.NodeDetachVolume", nReq,
|
||||
&cstructs.ClientCSINodeDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
args.nodeClaims[nodeID]--
|
||||
|
||||
// (2) we only emit the controller unpublish if no other allocs
|
||||
// on the node need it, but we also only want to make this
|
||||
// call at most once per node
|
||||
if vol.ControllerRequired && args.nodeClaims[nodeID] < 1 {
|
||||
controllerNodeID, err := nodeForControllerPlugin(srv.State(), args.plug)
|
||||
if err != nil || nodeID == "" {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: nodeID,
|
||||
}
|
||||
cReq.PluginID = args.plug.ID
|
||||
cReq.ControllerNodeID = controllerNodeID
|
||||
err = srv.RPC("ClientCSI.ControllerDetachVolume", cReq,
|
||||
&cstructs.ClientCSIControllerDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
}
|
||||
|
||||
// (3) release the claim from the state store, allowing it to be rescheduled
|
||||
req := &structs.CSIVolumeClaimRequest{
|
||||
VolumeID: vol.ID,
|
||||
AllocationID: args.allocID,
|
||||
Claim: structs.CSIVolumeClaimRelease,
|
||||
WriteRequest: structs.WriteRequest{
|
||||
Region: args.region,
|
||||
Namespace: args.namespace,
|
||||
AuthToken: args.leaderACL,
|
||||
},
|
||||
}
|
||||
err = srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
return args.nodeClaims, nil
|
||||
}
|
||||
|
|
|
@ -6,8 +6,10 @@ import (
|
|||
"time"
|
||||
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
@ -2194,124 +2196,15 @@ func TestAllocation_GCEligible(t *testing.T) {
|
|||
require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
|
||||
}
|
||||
|
||||
func TestCSI_GCVolumeClaims(t *testing.T) {
|
||||
func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, shutdown := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdown()
|
||||
testutil.WaitForLeader(t, srv.RPC)
|
||||
|
||||
state := srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
// Create a client node, plugin, and volume
|
||||
node := mock.Node()
|
||||
node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
|
||||
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
||||
"csi-plugin-example": {PluginID: "csi-plugin-example",
|
||||
Healthy: true,
|
||||
NodeInfo: &structs.CSINodeInfo{},
|
||||
},
|
||||
}
|
||||
err := state.UpsertNode(99, node)
|
||||
require.NoError(t, err)
|
||||
volId0 := uuid.Generate()
|
||||
ns := structs.DefaultNamespace
|
||||
vols := []*structs.CSIVolume{{
|
||||
ID: volId0,
|
||||
Namespace: ns,
|
||||
PluginID: "csi-plugin-example",
|
||||
AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter,
|
||||
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
||||
}}
|
||||
err = state.CSIVolumeRegister(100, vols)
|
||||
require.NoError(t, err)
|
||||
vol, err := state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, vol.ReadAllocs, 0)
|
||||
require.Len(t, vol.WriteAllocs, 0)
|
||||
|
||||
// Create a job with 2 allocations
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
|
||||
"_": {
|
||||
Name: "someVolume",
|
||||
Type: structs.VolumeTypeCSI,
|
||||
Source: volId0,
|
||||
ReadOnly: false,
|
||||
},
|
||||
}
|
||||
err = state.UpsertJob(101, job)
|
||||
require.NoError(t, err)
|
||||
|
||||
alloc1 := mock.Alloc()
|
||||
alloc1.JobID = job.ID
|
||||
alloc1.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(102, mock.JobSummary(alloc1.JobID))
|
||||
require.NoError(t, err)
|
||||
alloc1.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
alloc2 := mock.Alloc()
|
||||
alloc2.JobID = job.ID
|
||||
alloc2.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(103, mock.JobSummary(alloc2.JobID))
|
||||
require.NoError(t, err)
|
||||
alloc2.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
err = state.UpsertAllocs(104, []*structs.Allocation{alloc1, alloc2})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Claim the volumes and verify the claims were set
|
||||
err = state.CSIVolumeClaim(105, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
||||
require.NoError(t, err)
|
||||
err = state.CSIVolumeClaim(106, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
||||
require.NoError(t, err)
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, vol.ReadAllocs, 1)
|
||||
require.Len(t, vol.WriteAllocs, 1)
|
||||
|
||||
// Update the 1st alloc as failed/terminated
|
||||
alloc1.ClientStatus = structs.AllocClientStatusFailed
|
||||
err = state.UpdateAllocsFromClient(107, []*structs.Allocation{alloc1})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create the GC eval we'd get from Node.UpdateAlloc
|
||||
now := time.Now().UTC()
|
||||
eval := &structs.Evaluation{
|
||||
ID: uuid.Generate(),
|
||||
Namespace: job.Namespace,
|
||||
Priority: structs.CoreJobPriority,
|
||||
Type: structs.JobTypeCore,
|
||||
TriggeredBy: structs.EvalTriggerAllocStop,
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + job.ID,
|
||||
LeaderACL: srv.getLeaderAcl(),
|
||||
Status: structs.EvalStatusPending,
|
||||
CreateTime: now.UTC().UnixNano(),
|
||||
ModifyTime: now.UTC().UnixNano(),
|
||||
}
|
||||
|
||||
// Process the eval
|
||||
snap, err := state.Snapshot()
|
||||
require.NoError(t, err)
|
||||
core := NewCoreScheduler(srv, snap)
|
||||
err = core.Process(eval)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the claim was released
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, vol.ReadAllocs, 1)
|
||||
require.Len(t, vol.WriteAllocs, 0)
|
||||
}
|
||||
|
||||
func TestCSI_GCVolumeClaims_Controller(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, shutdown := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdown()
|
||||
srv, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdownSrv()
|
||||
testutil.WaitForLeader(t, srv.RPC)
|
||||
|
||||
state := srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
index := uint64(100)
|
||||
|
||||
// Create a client node, plugin, and volume
|
||||
node := mock.Node()
|
||||
|
@ -2348,7 +2241,9 @@ func TestCSI_GCVolumeClaims_Controller(t *testing.T) {
|
|||
AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter,
|
||||
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
||||
}}
|
||||
err = state.CSIVolumeRegister(100, vols)
|
||||
|
||||
err = state.CSIVolumeRegister(index, vols)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
vol, err := state.CSIVolumeByID(ws, ns, volId0)
|
||||
|
||||
|
@ -2367,20 +2262,23 @@ func TestCSI_GCVolumeClaims_Controller(t *testing.T) {
|
|||
ReadOnly: false,
|
||||
},
|
||||
}
|
||||
err = state.UpsertJob(101, job)
|
||||
err = state.UpsertJob(index, job)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
|
||||
alloc1 := mock.Alloc()
|
||||
alloc1.JobID = job.ID
|
||||
alloc1.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(102, mock.JobSummary(alloc1.JobID))
|
||||
err = state.UpsertJobSummary(index, mock.JobSummary(alloc1.JobID))
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
alloc1.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
alloc2 := mock.Alloc()
|
||||
alloc2.JobID = job.ID
|
||||
alloc2.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(103, mock.JobSummary(alloc2.JobID))
|
||||
err = state.UpsertJobSummary(index, mock.JobSummary(alloc2.JobID))
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
alloc2.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
|
@ -2388,9 +2286,11 @@ func TestCSI_GCVolumeClaims_Controller(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
|
||||
// Claim the volumes and verify the claims were set
|
||||
err = state.CSIVolumeClaim(105, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
||||
err = state.CSIVolumeClaim(index, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
err = state.CSIVolumeClaim(106, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
||||
err = state.CSIVolumeClaim(index, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
|
@ -2400,34 +2300,165 @@ func TestCSI_GCVolumeClaims_Controller(t *testing.T) {
|
|||
// Update both allocs as failed/terminated
|
||||
alloc1.ClientStatus = structs.AllocClientStatusFailed
|
||||
alloc2.ClientStatus = structs.AllocClientStatusFailed
|
||||
err = state.UpdateAllocsFromClient(107, []*structs.Allocation{alloc1, alloc2})
|
||||
err = state.UpdateAllocsFromClient(index, []*structs.Allocation{alloc1, alloc2})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create the GC eval we'd get from Node.UpdateAlloc
|
||||
now := time.Now().UTC()
|
||||
eval := &structs.Evaluation{
|
||||
ID: uuid.Generate(),
|
||||
Namespace: job.Namespace,
|
||||
Priority: structs.CoreJobPriority,
|
||||
Type: structs.JobTypeCore,
|
||||
TriggeredBy: structs.EvalTriggerAllocStop,
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + job.ID,
|
||||
LeaderACL: srv.getLeaderAcl(),
|
||||
Status: structs.EvalStatusPending,
|
||||
CreateTime: now.UTC().UnixNano(),
|
||||
ModifyTime: now.UTC().UnixNano(),
|
||||
vol, err = state.CSIVolumeDenormalize(ws, vol)
|
||||
require.NoError(t, err)
|
||||
|
||||
gcClaims, nodeClaims := collectClaimsToGCImpl(vol)
|
||||
require.Equal(t, nodeClaims[node.ID], 2)
|
||||
require.Len(t, gcClaims, 2)
|
||||
}
|
||||
|
||||
func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
s, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdownSrv()
|
||||
testutil.WaitForLeader(t, s.RPC)
|
||||
|
||||
node := mock.Node()
|
||||
plugin := mock.CSIPlugin()
|
||||
vol := mock.CSIVolume(plugin)
|
||||
alloc := mock.Alloc()
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
Claim gcClaimRequest
|
||||
ClaimsCount map[string]int
|
||||
ControllerRequired bool
|
||||
ExpectedErr string
|
||||
ExpectedCount int
|
||||
ExpectedClaimsCount int
|
||||
ExpectedNodeDetachVolumeCount int
|
||||
ExpectedControllerDetachVolumeCount int
|
||||
ExpectedVolumeClaimCount int
|
||||
srv *MockRPCServer
|
||||
}{
|
||||
{
|
||||
Name: "NodeDetachVolume fails",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "node plugin missing",
|
||||
ExpectedClaimsCount: 1,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
nextCSINodeDetachVolumeError: fmt.Errorf("node plugin missing"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume no controllers",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: fmt.Sprintf(
|
||||
"no controllers available for plugin %q", plugin.ID),
|
||||
ExpectedClaimsCount: 0,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
ExpectedControllerDetachVolumeCount: 0,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume node-only",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: false,
|
||||
ExpectedClaimsCount: 0,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
ExpectedControllerDetachVolumeCount: 0,
|
||||
ExpectedVolumeClaimCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Process the eval
|
||||
snap, err := state.Snapshot()
|
||||
require.NoError(t, err)
|
||||
core := NewCoreScheduler(srv, snap)
|
||||
err = core.Process(eval)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify both claims were released
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, vol.ReadAllocs, 0)
|
||||
require.Len(t, vol.WriteAllocs, 0)
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
vol.ControllerRequired = tc.ControllerRequired
|
||||
nodeClaims, err := volumeClaimReapImpl(tc.srv, &volumeClaimReapArgs{
|
||||
vol: vol,
|
||||
plug: plugin,
|
||||
allocID: tc.Claim.allocID,
|
||||
nodeID: tc.Claim.nodeID,
|
||||
mode: tc.Claim.mode,
|
||||
region: "global",
|
||||
namespace: "default",
|
||||
leaderACL: "not-in-use",
|
||||
nodeClaims: tc.ClaimsCount,
|
||||
})
|
||||
if tc.ExpectedErr != "" {
|
||||
require.EqualError(err, tc.ExpectedErr)
|
||||
} else {
|
||||
require.NoError(err)
|
||||
}
|
||||
require.Equal(tc.ExpectedClaimsCount,
|
||||
nodeClaims[tc.Claim.nodeID], "expected claims")
|
||||
require.Equal(tc.ExpectedNodeDetachVolumeCount,
|
||||
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
|
||||
require.Equal(tc.ExpectedControllerDetachVolumeCount,
|
||||
tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
||||
require.Equal(tc.ExpectedVolumeClaimCount,
|
||||
tc.srv.countCSIVolumeClaim, "volume claim RPC count")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type MockRPCServer struct {
|
||||
state *state.StateStore
|
||||
|
||||
// mock responses for ClientCSI.NodeDetachVolume
|
||||
nextCSINodeDetachVolumeResponse *cstructs.ClientCSINodeDetachVolumeResponse
|
||||
nextCSINodeDetachVolumeError error
|
||||
countCSINodeDetachVolume int
|
||||
|
||||
// mock responses for ClientCSI.ControllerDetachVolume
|
||||
nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
|
||||
nextCSIControllerDetachVolumeError error
|
||||
countCSIControllerDetachVolume int
|
||||
|
||||
// mock responses for CSI.VolumeClaim
|
||||
nextCSIVolumeClaimResponse *structs.CSIVolumeClaimResponse
|
||||
nextCSIVolumeClaimError error
|
||||
countCSIVolumeClaim int
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) RPC(method string, args interface{}, reply interface{}) error {
|
||||
switch method {
|
||||
case "ClientCSI.NodeDetachVolume":
|
||||
reply = srv.nextCSINodeDetachVolumeResponse
|
||||
srv.countCSINodeDetachVolume++
|
||||
return srv.nextCSINodeDetachVolumeError
|
||||
case "ClientCSI.ControllerDetachVolume":
|
||||
reply = srv.nextCSIControllerDetachVolumeResponse
|
||||
srv.countCSIControllerDetachVolume++
|
||||
return srv.nextCSIControllerDetachVolumeError
|
||||
case "CSIVolume.Claim":
|
||||
reply = srv.nextCSIVolumeClaimResponse
|
||||
srv.countCSIVolumeClaim++
|
||||
return srv.nextCSIVolumeClaimError
|
||||
default:
|
||||
return fmt.Errorf("unexpected method %q passed to mock", method)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
|
||||
|
|
|
@ -2,7 +2,6 @@ package nomad
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
|
@ -240,12 +239,12 @@ func (srv *Server) controllerValidateVolume(req *structs.CSIVolumeRegisterReques
|
|||
// plugin IDs are not scoped to region/DC but volumes are.
|
||||
// so any node we get for a controller is already in the same region/DC
|
||||
// for the volume.
|
||||
nodeID, err := srv.nodeForControllerPlugin(plugin)
|
||||
nodeID, err := nodeForControllerPlugin(srv.fsm.State(), plugin)
|
||||
if err != nil || nodeID == "" {
|
||||
return err
|
||||
}
|
||||
|
||||
method := "ClientCSIController.ValidateVolume"
|
||||
method := "ClientCSI.ControllerValidateVolume"
|
||||
cReq := &cstructs.ClientCSIControllerValidateVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
AttachmentMode: vol.AttachmentMode,
|
||||
|
@ -532,7 +531,7 @@ func (srv *Server) controllerPublishVolume(req *structs.CSIVolumeClaimRequest, r
|
|||
// plugin IDs are not scoped to region/DC but volumes are.
|
||||
// so any node we get for a controller is already in the same region/DC
|
||||
// for the volume.
|
||||
nodeID, err := srv.nodeForControllerPlugin(plug)
|
||||
nodeID, err := nodeForControllerPlugin(state, plug)
|
||||
if err != nil || nodeID == "" {
|
||||
return err
|
||||
}
|
||||
|
@ -549,7 +548,7 @@ func (srv *Server) controllerPublishVolume(req *structs.CSIVolumeClaimRequest, r
|
|||
return fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
|
||||
}
|
||||
|
||||
method := "ClientCSIController.AttachVolume"
|
||||
method := "ClientCSI.ControllerAttachVolume"
|
||||
cReq := &cstructs.ClientCSIControllerAttachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
|
||||
|
@ -568,116 +567,3 @@ func (srv *Server) controllerPublishVolume(req *structs.CSIVolumeClaimRequest, r
|
|||
resp.PublishContext = cResp.PublishContext
|
||||
return nil
|
||||
}
|
||||
|
||||
// controllerUnpublishVolume sends an unpublish request to the CSI
|
||||
// controller plugin associated with a volume, if any.
|
||||
// TODO: the only caller of this won't have an alloc pointer handy, should it be its own request arg type?
|
||||
func (srv *Server) controllerUnpublishVolume(req *structs.CSIVolumeClaimRequest, targetNomadNodeID string) error {
|
||||
plug, vol, err := srv.volAndPluginLookup(req.RequestNamespace(), req.VolumeID)
|
||||
if plug == nil || vol == nil || err != nil {
|
||||
return err // possibly nil if no controller required
|
||||
}
|
||||
|
||||
ws := memdb.NewWatchSet()
|
||||
state := srv.State()
|
||||
|
||||
targetNode, err := state.NodeByID(ws, targetNomadNodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetNode == nil {
|
||||
return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, targetNomadNodeID)
|
||||
}
|
||||
targetCSIInfo, ok := targetNode.CSINodePlugins[plug.ID]
|
||||
if !ok {
|
||||
return fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
|
||||
}
|
||||
|
||||
// plugin IDs are not scoped to region/DC but volumes are.
|
||||
// so any node we get for a controller is already in the same region/DC
|
||||
// for the volume.
|
||||
nodeID, err := srv.nodeForControllerPlugin(plug)
|
||||
if err != nil || nodeID == "" {
|
||||
return err
|
||||
}
|
||||
|
||||
method := "ClientCSIController.DetachVolume"
|
||||
cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
|
||||
}
|
||||
cReq.PluginID = plug.ID
|
||||
cReq.ControllerNodeID = nodeID
|
||||
return srv.RPC(method, cReq, &cstructs.ClientCSIControllerDetachVolumeResponse{})
|
||||
}
|
||||
|
||||
func (srv *Server) volAndPluginLookup(namespace, volID string) (*structs.CSIPlugin, *structs.CSIVolume, error) {
|
||||
state := srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
vol, err := state.CSIVolumeByID(ws, namespace, volID)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if vol == nil {
|
||||
return nil, nil, fmt.Errorf("volume not found: %s", volID)
|
||||
}
|
||||
if !vol.ControllerRequired {
|
||||
return nil, vol, nil
|
||||
}
|
||||
|
||||
// note: we do this same lookup in CSIVolumeByID but then throw
|
||||
// away the pointer to the plugin rather than attaching it to
|
||||
// the volume so we have to do it again here.
|
||||
plug, err := state.CSIPluginByID(ws, vol.PluginID)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if plug == nil {
|
||||
return nil, nil, fmt.Errorf("plugin not found: %s", vol.PluginID)
|
||||
}
|
||||
return plug, vol, nil
|
||||
}
|
||||
|
||||
// nodeForControllerPlugin returns the node ID for a random controller
|
||||
// to load-balance long-blocking RPCs across client nodes.
|
||||
func (srv *Server) nodeForControllerPlugin(plugin *structs.CSIPlugin) (string, error) {
|
||||
count := len(plugin.Controllers)
|
||||
if count == 0 {
|
||||
return "", fmt.Errorf("no controllers available for plugin %q", plugin.ID)
|
||||
}
|
||||
snap, err := srv.fsm.State().Snapshot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// iterating maps is "random" but unspecified and isn't particularly
|
||||
// random with small maps, so not well-suited for load balancing.
|
||||
// so we shuffle the keys and iterate over them.
|
||||
clientIDs := make([]string, count)
|
||||
for clientID := range plugin.Controllers {
|
||||
clientIDs = append(clientIDs, clientID)
|
||||
}
|
||||
rand.Shuffle(count, func(i, j int) {
|
||||
clientIDs[i], clientIDs[j] = clientIDs[j], clientIDs[i]
|
||||
})
|
||||
|
||||
for _, clientID := range clientIDs {
|
||||
controller := plugin.Controllers[clientID]
|
||||
if !controller.IsController() {
|
||||
// we don't have separate types for CSIInfo depending on
|
||||
// whether it's a controller or node. this error shouldn't
|
||||
// make it to production but is to aid developers during
|
||||
// development
|
||||
err = fmt.Errorf("plugin is not a controller")
|
||||
continue
|
||||
}
|
||||
_, err = getNodeForRpc(snap, clientID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return clientID, nil
|
||||
}
|
||||
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -638,7 +638,7 @@ func TestCSI_NodeForControllerPlugin(t *testing.T) {
|
|||
|
||||
plugin, err := state.CSIPluginByID(ws, "minnie")
|
||||
require.NoError(t, err)
|
||||
nodeID, err := srv.nodeForControllerPlugin(plugin)
|
||||
nodeID, err := nodeForControllerPlugin(state, plugin)
|
||||
|
||||
// only node1 has both the controller and a recent Nomad version
|
||||
require.Equal(t, nodeID, node1.ID)
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
package nomad
|
||||
|
||||
import "github.com/hashicorp/nomad/nomad/state"
|
||||
|
||||
// RPCServer is a minimal interface of the Server, intended as
|
||||
// an aid for testing logic surrounding server-to-server or
|
||||
// server-to-client RPC calls
|
||||
type RPCServer interface {
|
||||
RPC(method string, args interface{}, reply interface{}) error
|
||||
State() *state.StateStore
|
||||
}
|
|
@ -262,11 +262,11 @@ type endpoints struct {
|
|||
Enterprise *EnterpriseEndpoints
|
||||
|
||||
// Client endpoints
|
||||
ClientStats *ClientStats
|
||||
FileSystem *FileSystem
|
||||
Agent *Agent
|
||||
ClientAllocations *ClientAllocations
|
||||
ClientCSIController *ClientCSIController
|
||||
ClientStats *ClientStats
|
||||
FileSystem *FileSystem
|
||||
Agent *Agent
|
||||
ClientAllocations *ClientAllocations
|
||||
ClientCSI *ClientCSI
|
||||
}
|
||||
|
||||
// NewServer is used to construct a new Nomad server from the
|
||||
|
@ -1113,7 +1113,7 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
|
|||
s.staticEndpoints.ClientStats = &ClientStats{srv: s, logger: s.logger.Named("client_stats")}
|
||||
s.staticEndpoints.ClientAllocations = &ClientAllocations{srv: s, logger: s.logger.Named("client_allocs")}
|
||||
s.staticEndpoints.ClientAllocations.register()
|
||||
s.staticEndpoints.ClientCSIController = &ClientCSIController{srv: s, logger: s.logger.Named("client_csi")}
|
||||
s.staticEndpoints.ClientCSI = &ClientCSI{srv: s, logger: s.logger.Named("client_csi")}
|
||||
|
||||
// Streaming endpoints
|
||||
s.staticEndpoints.FileSystem = &FileSystem{srv: s, logger: s.logger.Named("client_fs")}
|
||||
|
@ -1142,7 +1142,7 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
|
|||
s.staticEndpoints.Enterprise.Register(server)
|
||||
server.Register(s.staticEndpoints.ClientStats)
|
||||
server.Register(s.staticEndpoints.ClientAllocations)
|
||||
server.Register(s.staticEndpoints.ClientCSIController)
|
||||
server.Register(s.staticEndpoints.ClientCSI)
|
||||
server.Register(s.staticEndpoints.FileSystem)
|
||||
server.Register(s.staticEndpoints.Agent)
|
||||
|
||||
|
|
Loading…
Reference in New Issue