package client import ( "context" "errors" "fmt" "time" metrics "github.com/armon/go-metrics" grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/structs" nstructs "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/csi" ) // CSI endpoint is used for interacting with CSI plugins on a client. // TODO: Submit metrics with labels to allow debugging per plugin perf problems. type CSI struct { c *Client } const ( // CSIPluginRequestTimeout is the timeout that should be used when making reqs // against CSI Plugins. It is copied from Kubernetes as an initial seed value. // https://github.com/kubernetes/kubernetes/blob/e680ad7156f263a6d8129cc0117fda58602e50ad/pkg/volume/csi/csi_plugin.go#L52 CSIPluginRequestTimeout = 2 * time.Minute ) var ( ErrPluginTypeError = errors.New("CSI Plugin loaded incorrectly") ) // ControllerValidateVolume is used during volume registration to validate // that a volume exists and that the capabilities it was registered with are // supported by the CSI Plugin and external volume configuration. func (c *CSI) ControllerValidateVolume(req *structs.ClientCSIControllerValidateVolumeRequest, resp *structs.ClientCSIControllerValidateVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "validate_volume"}, time.Now()) if req.VolumeID == "" { return errors.New("CSI.ControllerValidateVolume: VolumeID is required") } if req.PluginID == "" { return errors.New("CSI.ControllerValidateVolume: PluginID is required") } plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerValidateVolume: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq, err := req.ToCSIRequest() if err != nil { return fmt.Errorf("CSI.ControllerValidateVolume: %v", err) } ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ValidateVolumeCapabilities errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. err = plugin.ControllerValidateCapabilities(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerValidateVolume: %v", err) } return nil } // ControllerAttachVolume is used to attach a volume from a CSI Cluster to // the storage node provided in the request. // // The controller attachment flow currently works as follows: // 1. Validate the volume request // 2. Call ControllerPublishVolume on the CSI Plugin to trigger a remote attachment // // In the future this may be expanded to request dynamic secrets for attachment. func (c *CSI) ControllerAttachVolume(req *structs.ClientCSIControllerAttachVolumeRequest, resp *structs.ClientCSIControllerAttachVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "publish_volume"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerAttachVolume: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() // The following block of validation checks should not be reached on a // real Nomad cluster as all of this data should be validated when registering // volumes with the cluster. They serve as a defensive check before forwarding // requests to plugins, and to aid with development. if req.VolumeID == "" { return errors.New("CSI.ControllerAttachVolume: VolumeID is required") } if req.ClientCSINodeID == "" { return errors.New("CSI.ControllerAttachVolume: ClientCSINodeID is required") } csiReq, err := req.ToCSIRequest() if err != nil { return fmt.Errorf("CSI.ControllerAttachVolume: %v", err) } // Submit the request for a volume to the CSI Plugin. ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerPublishVolume errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. cresp, err := plugin.ControllerPublishVolume(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerAttachVolume: %v", err) } resp.PublishContext = cresp.PublishContext return nil } // ControllerDetachVolume is used to detach a volume from a CSI Cluster from // the storage node provided in the request. func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolumeRequest, resp *structs.ClientCSIControllerDetachVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "unpublish_volume"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerDetachVolume: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() // The following block of validation checks should not be reached on a // real Nomad cluster as all of this data should be validated when registering // volumes with the cluster. They serve as a defensive check before forwarding // requests to plugins, and to aid with development. if req.VolumeID == "" { return errors.New("CSI.ControllerDetachVolume: VolumeID is required") } if req.ClientCSINodeID == "" { return errors.New("CSI.ControllerDetachVolume: ClientCSINodeID is required") } csiReq := req.ToCSIRequest() // Submit the request for a volume to the CSI Plugin. ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerUnpublishVolume errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. _, err = plugin.ControllerUnpublishVolume(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { // if the controller detach previously happened but the server failed to // checkpoint, we'll get an error from the plugin but can safely ignore it. c.c.logger.Debug("could not unpublish volume", "error", err) return nil } if err != nil { return fmt.Errorf("CSI.ControllerDetachVolume: %v", err) } return err } func (c *CSI) ControllerCreateVolume(req *structs.ClientCSIControllerCreateVolumeRequest, resp *structs.ClientCSIControllerCreateVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "create_volume"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerCreateVolume: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq, err := req.ToCSIRequest() if err != nil { return fmt.Errorf("CSI.ControllerCreateVolume: %v", err) } ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerCreateVolume errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. cresp, err := plugin.ControllerCreateVolume(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerCreateVolume: %v", err) } if cresp == nil || cresp.Volume == nil { c.c.logger.Warn("plugin did not return error or volume; this is a bug in the plugin and should be reported to the plugin author") return fmt.Errorf("CSI.ControllerCreateVolume: plugin did not return error or volume") } resp.ExternalVolumeID = cresp.Volume.ExternalVolumeID resp.CapacityBytes = cresp.Volume.CapacityBytes resp.VolumeContext = cresp.Volume.VolumeContext return nil } func (c *CSI) ControllerDeleteVolume(req *structs.ClientCSIControllerDeleteVolumeRequest, resp *structs.ClientCSIControllerDeleteVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "delete_volume"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerDeleteVolume: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq := req.ToCSIRequest() ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerDeleteVolume errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. err = plugin.ControllerDeleteVolume(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { // if the volume was deleted out-of-band, we'll get an error from // the plugin but can safely ignore it c.c.logger.Debug("could not delete volume", "error", err) return nil } if err != nil { return fmt.Errorf("CSI.ControllerDeleteVolume: %v", err) } return err } func (c *CSI) ControllerListVolumes(req *structs.ClientCSIControllerListVolumesRequest, resp *structs.ClientCSIControllerListVolumesResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "list_volumes"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerListVolumes: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq := req.ToCSIRequest() ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerListVolumes errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. cresp, err := plugin.ControllerListVolumes(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerListVolumes: %v", err) } resp.NextToken = cresp.NextToken resp.Entries = []*nstructs.CSIVolumeExternalStub{} for _, entry := range cresp.Entries { if entry.Volume == nil { return fmt.Errorf("CSI.ControllerListVolumes: plugin returned an invalid entry") } vol := &nstructs.CSIVolumeExternalStub{ ExternalID: entry.Volume.ExternalVolumeID, CapacityBytes: entry.Volume.CapacityBytes, VolumeContext: entry.Volume.VolumeContext, CloneID: entry.Volume.ContentSource.CloneID, SnapshotID: entry.Volume.ContentSource.SnapshotID, } if entry.Status != nil { vol.PublishedExternalNodeIDs = entry.Status.PublishedNodeIds vol.IsAbnormal = entry.Status.VolumeCondition.Abnormal if entry.Status.VolumeCondition != nil { vol.Status = entry.Status.VolumeCondition.Message } } resp.Entries = append(resp.Entries, vol) if req.MaxEntries != 0 && int32(len(resp.Entries)) == req.MaxEntries { break } } return nil } func (c *CSI) ControllerCreateSnapshot(req *structs.ClientCSIControllerCreateSnapshotRequest, resp *structs.ClientCSIControllerCreateSnapshotResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "create_snapshot"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerCreateSnapshot: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq, err := req.ToCSIRequest() if err != nil { return fmt.Errorf("CSI.ControllerCreateSnapshot: %v", err) } ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerCreateSnapshot errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. cresp, err := plugin.ControllerCreateSnapshot(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerCreateSnapshot: %v", err) } if cresp == nil || cresp.Snapshot == nil { c.c.logger.Warn("plugin did not return error or snapshot; this is a bug in the plugin and should be reported to the plugin author") return fmt.Errorf("CSI.ControllerCreateSnapshot: plugin did not return error or snapshot") } resp.ID = cresp.Snapshot.ID resp.ExternalSourceVolumeID = cresp.Snapshot.SourceVolumeID resp.SizeBytes = cresp.Snapshot.SizeBytes resp.CreateTime = cresp.Snapshot.CreateTime resp.IsReady = cresp.Snapshot.IsReady return nil } func (c *CSI) ControllerDeleteSnapshot(req *structs.ClientCSIControllerDeleteSnapshotRequest, resp *structs.ClientCSIControllerDeleteSnapshotResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "delete_snapshot"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerDeleteSnapshot: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq := req.ToCSIRequest() ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerDeleteSnapshot errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. err = plugin.ControllerDeleteSnapshot(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { // if the snapshot was deleted out-of-band, we'll get an error from // the plugin but can safely ignore it c.c.logger.Debug("could not delete snapshot", "error", err) return nil } if err != nil { return fmt.Errorf("CSI.ControllerDeleteSnapshot: %v", err) } return err } func (c *CSI) ControllerListSnapshots(req *structs.ClientCSIControllerListSnapshotsRequest, resp *structs.ClientCSIControllerListSnapshotsResponse) error { defer metrics.MeasureSince([]string{"client", "csi_controller", "list_snapshots"}, time.Now()) plugin, err := c.findControllerPlugin(req.PluginID) if err != nil { // the server's view of the plugin health is stale, so let it know it // should retry with another controller instance return fmt.Errorf("CSI.ControllerListSnapshots: %w: %v", nstructs.ErrCSIClientRPCRetryable, err) } defer plugin.Close() csiReq := req.ToCSIRequest() ctx, cancelFn := c.requestContext() defer cancelFn() // CSI ControllerListSnapshots errors for timeout, codes.Unavailable and // codes.ResourceExhausted are retried; all other errors are fatal. cresp, err := plugin.ControllerListSnapshots(ctx, csiReq, grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), grpc_retry.WithMax(3), grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) if err != nil { return fmt.Errorf("CSI.ControllerListSnapshots: %v", err) } resp.NextToken = cresp.NextToken resp.Entries = []*nstructs.CSISnapshot{} for _, entry := range cresp.Entries { if entry.Snapshot == nil { return fmt.Errorf("CSI.ControllerListSnapshot: plugin returned an invalid entry") } snap := &nstructs.CSISnapshot{ ID: entry.Snapshot.ID, ExternalSourceVolumeID: entry.Snapshot.SourceVolumeID, SizeBytes: entry.Snapshot.SizeBytes, CreateTime: entry.Snapshot.CreateTime, IsReady: entry.Snapshot.IsReady, PluginID: req.PluginID, } resp.Entries = append(resp.Entries, snap) if req.MaxEntries != 0 && int32(len(resp.Entries)) == req.MaxEntries { break } } return nil } // NodeDetachVolume is used to detach a volume from a CSI Cluster from // the storage node provided in the request. func (c *CSI) NodeDetachVolume(req *structs.ClientCSINodeDetachVolumeRequest, resp *structs.ClientCSINodeDetachVolumeResponse) error { defer metrics.MeasureSince([]string{"client", "csi_node", "detach_volume"}, time.Now()) // The following block of validation checks should not be reached on a // real Nomad cluster. They serve as a defensive check before forwarding // requests to plugins, and to aid with development. if req.PluginID == "" { return errors.New("CSI.NodeDetachVolume: PluginID is required") } if req.VolumeID == "" { return errors.New("CSI.NodeDetachVolume: VolumeID is required") } if req.AllocID == "" { return errors.New("CSI.NodeDetachVolume: AllocID is required") } ctx, cancelFn := c.requestContext() defer cancelFn() mounter, err := c.c.csimanager.MounterForPlugin(ctx, req.PluginID) if err != nil { return fmt.Errorf("CSI.NodeDetachVolume: %v", err) } usageOpts := &csimanager.UsageOptions{ ReadOnly: req.ReadOnly, AttachmentMode: req.AttachmentMode, AccessMode: req.AccessMode, } err = mounter.UnmountVolume(ctx, req.VolumeID, req.ExternalID, req.AllocID, usageOpts) if err != nil && !errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { // if the unmounting previously happened but the server failed to // checkpoint, we'll get an error from Unmount but can safely // ignore it. return fmt.Errorf("CSI.NodeDetachVolume: %v", err) } return nil } func (c *CSI) findControllerPlugin(name string) (csi.CSIPlugin, error) { return c.findPlugin(dynamicplugins.PluginTypeCSIController, name) } func (c *CSI) findPlugin(ptype, name string) (csi.CSIPlugin, error) { pIface, err := c.c.dynamicRegistry.DispensePlugin(ptype, name) if err != nil { return nil, err } plugin, ok := pIface.(csi.CSIPlugin) if !ok { return nil, ErrPluginTypeError } return plugin, nil } func (c *CSI) requestContext() (context.Context, context.CancelFunc) { return context.WithTimeout(context.Background(), CSIPluginRequestTimeout) }