open-nomad/nomad/client_csi_endpoint_test.go
Tim Gross f6b3d38eb8
CSI: move node unmount to server-driven RPCs (#7596)
If a volume-claiming alloc stops and the CSI Node plugin that serves
that alloc's volumes is missing, there's no way for the allocrunner
hook to send the `NodeUnpublish` and `NodeUnstage` RPCs.

This changeset addresses this issue with a redesign of the client-side
for CSI. Rather than unmounting in the alloc runner hook, the alloc
runner hook will simply exit. When the server gets the
`Node.UpdateAlloc` for the terminal allocation that had a volume claim,
it creates a volume claim GC job. This job will made client RPCs to a
new node plugin RPC endpoint, and only once that succeeds, move on to
making the client RPCs to the controller plugin. If the node plugin is
unavailable, the GC job will fail and be requeued.
2020-04-02 16:04:56 -04:00

170 lines
5 KiB
Go

package nomad
import (
"testing"
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/client"
"github.com/hashicorp/nomad/client/config"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
func TestClientCSIController_AttachVolume_Local(t *testing.T) {
t.Parallel()
require := require.New(t)
// Start a server and client
s, cleanupS := TestServer(t, nil)
defer cleanupS()
codec := rpcClient(t, s)
testutil.WaitForLeader(t, s.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s.config.RPCAddr.String()}
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
require.Fail("should have a client")
})
req := &cstructs.ClientCSIControllerAttachVolumeRequest{
CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()},
}
// Fetch the response
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerAttachVolume", req, &resp)
require.NotNil(err)
// Should recieve an error from the client endpoint
require.Contains(err.Error(), "must specify plugin name to dispense")
}
func TestClientCSIController_AttachVolume_Forwarded(t *testing.T) {
t.Parallel()
require := require.New(t)
// Start a server and client
s1, cleanupS1 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 })
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 })
defer cleanupS2()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
codec := rpcClient(t, s2)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.config.RPCAddr.String()}
c.GCDiskUsageThreshold = 100.0
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
require.Fail("should have a client")
})
// Force remove the connection locally in case it exists
s1.nodeConnsLock.Lock()
delete(s1.nodeConns, c.NodeID())
s1.nodeConnsLock.Unlock()
req := &cstructs.ClientCSIControllerAttachVolumeRequest{
CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()},
}
// Fetch the response
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerAttachVolume", req, &resp)
require.NotNil(err)
// Should recieve an error from the client endpoint
require.Contains(err.Error(), "must specify plugin name to dispense")
}
func TestClientCSIController_DetachVolume_Local(t *testing.T) {
t.Parallel()
require := require.New(t)
// Start a server and client
s, cleanupS := TestServer(t, nil)
defer cleanupS()
codec := rpcClient(t, s)
testutil.WaitForLeader(t, s.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s.config.RPCAddr.String()}
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
require.Fail("should have a client")
})
req := &cstructs.ClientCSIControllerDetachVolumeRequest{
CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()},
}
// Fetch the response
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerDetachVolume", req, &resp)
require.NotNil(err)
// Should recieve an error from the client endpoint
require.Contains(err.Error(), "must specify plugin name to dispense")
}
func TestClientCSIController_DetachVolume_Forwarded(t *testing.T) {
t.Parallel()
require := require.New(t)
// Start a server and client
s1, cleanupS1 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 })
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 })
defer cleanupS2()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
codec := rpcClient(t, s2)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.config.RPCAddr.String()}
c.GCDiskUsageThreshold = 100.0
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
require.Fail("should have a client")
})
// Force remove the connection locally in case it exists
s1.nodeConnsLock.Lock()
delete(s1.nodeConns, c.NodeID())
s1.nodeConnsLock.Unlock()
req := &cstructs.ClientCSIControllerDetachVolumeRequest{
CSIControllerQuery: cstructs.CSIControllerQuery{ControllerNodeID: c.NodeID()},
}
// Fetch the response
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "ClientCSI.ControllerDetachVolume", req, &resp)
require.NotNil(err)
// Should recieve an error from the client endpoint
require.Contains(err.Error(), "must specify plugin name to dispense")
}