open-consul/consul/client_test.go
James Phillips bc29610124 Adds support for snapshots and restores. (#2396)
* Updates Raft library to get new snapshot/restore API.

* Basic backup and restore working, but need some cleanup.

* Breaks out a snapshot module and adds a SHA256 integrity check.

* Adds snapshot ACL and fills in some missing comments.

* Require a consistent read for snapshots.

* Make sure snapshot works if ACLs aren't enabled.

* Adds a bit of package documentation.

* Returns an empty response from restore to avoid EOF errors.

* Adds API client support for snapshots.

* Makes internal file names match on-disk file snapshots.

* Adds DC and token coverage for snapshot API test.

* Adds missing documentation.

* Adds a unit test for the snapshot client endpoint.

* Moves the connection pool out of the client for easier testing.

* Fixes an incidental issue in the prepared query unit test.

I realized I had two servers in bootstrap mode so this wasn't a good setup.

* Adds a half close to the TCP stream and fixes panic on error.

* Adds client and endpoint tests for snapshots.

* Moves the pool back into the snapshot RPC client.

* Adds a TLS test and fixes half-closes for TLS connections.

* Tweaks some comments.

* Adds a low-level snapshot test.

This is independent of Consul so we can pull this out into a library
later if we want to.

* Cleans up snapshot and archive and completes archive tests.

* Sends a clear error for snapshot operations in dev mode.

Snapshots require the Raft snapshots to be readable, which isn't supported
in dev mode. Send a clear error instead of a deep-down Raft one.

* Adds docs for the snapshot endpoint.

* Adds a stale mode and index feedback for snapshot saves.

This gives folks a way to extract data even if the cluster has no
leader.

* Changes the internal format of a snapshot from zip to tgz.

* Pulls in Raft fix to cancel inflight before a restore.

* Pulls in new Raft restore interface.

* Adds metadata to snapshot saves and a verify function.

* Adds basic save and restore snapshot CLI commands.

* Gets rid of tarball extensions and adds restore message.

* Fixes an incidental bad link in the KV docs.

* Adds documentation for the snapshot CLI commands.

* Scuttle any request body when a snapshot is saved.

* Fixes archive unit test error message check.

* Allows for nil output writers in snapshot RPC handlers.

* Renames hash list Decode to DecodeAndVerify.

* Closes the client connection for snapshot ops.

* Lowers timeout for restore ops.

* Updates Raft vendor to get new Restore signature and integrates with Consul.

* Bounces the leader's internal state when we do a restore.
2016-10-25 19:20:24 -07:00

565 lines
14 KiB
Go

package consul
import (
"bytes"
"fmt"
"net"
"os"
"sync"
"testing"
"time"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/testutil"
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/serf/serf"
)
func testClientConfig(t *testing.T, NodeName string) (string, *Config) {
dir := tmpDir(t)
config := DefaultConfig()
config.Datacenter = "dc1"
config.DataDir = dir
config.NodeName = NodeName
config.RPCAddr = &net.TCPAddr{
IP: []byte{127, 0, 0, 1},
Port: getPort(),
}
config.SerfLANConfig.MemberlistConfig.BindAddr = "127.0.0.1"
config.SerfLANConfig.MemberlistConfig.BindPort = getPort()
config.SerfLANConfig.MemberlistConfig.ProbeTimeout = 200 * time.Millisecond
config.SerfLANConfig.MemberlistConfig.ProbeInterval = time.Second
config.SerfLANConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
return dir, config
}
func testClient(t *testing.T) (string, *Client) {
return testClientDC(t, "dc1")
}
func testClientDC(t *testing.T, dc string) (string, *Client) {
dir, config := testClientConfig(t, "testco.internal")
config.Datacenter = dc
client, err := NewClient(config)
if err != nil {
t.Fatalf("err: %v", err)
}
return dir, client
}
func testClientWithConfig(t *testing.T, cb func(c *Config)) (string, *Client) {
name := fmt.Sprintf("Client %d", getPort())
dir, config := testClientConfig(t, name)
cb(config)
client, err := NewClient(config)
if err != nil {
t.Fatalf("err: %v", err)
}
return dir, client
}
func TestClient_StartStop(t *testing.T) {
dir, client := testClient(t)
defer os.RemoveAll(dir)
if err := client.Shutdown(); err != nil {
t.Fatalf("err: %v", err)
}
}
func TestClient_JoinLAN(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClient(t)
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
testutil.WaitForResult(func() (bool, error) {
return c1.servers.NumServers() == 1, nil
}, func(err error) {
t.Fatalf("expected consul server")
})
// Check the members
testutil.WaitForResult(func() (bool, error) {
server_check := len(s1.LANMembers()) == 2
client_check := len(c1.LANMembers()) == 2
return server_check && client_check, nil
}, func(err error) {
t.Fatalf("bad len")
})
// Check we have a new consul
testutil.WaitForResult(func() (bool, error) {
return c1.servers.NumServers() == 1, nil
}, func(err error) {
t.Fatalf("expected consul server")
})
}
func TestClient_JoinLAN_Invalid(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClientDC(t, "other")
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err == nil {
t.Fatalf("should error")
}
time.Sleep(50 * time.Millisecond)
if len(s1.LANMembers()) != 1 {
t.Fatalf("should not join")
}
if len(c1.LANMembers()) != 1 {
t.Fatalf("should not join")
}
}
func TestClient_JoinWAN_Invalid(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClientDC(t, "dc2")
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfWANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err == nil {
t.Fatalf("should error")
}
time.Sleep(50 * time.Millisecond)
if len(s1.WANMembers()) != 1 {
t.Fatalf("should not join")
}
if len(c1.LANMembers()) != 1 {
t.Fatalf("should not join")
}
}
func TestClient_RPC(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClient(t)
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try an RPC
var out struct{}
if err := c1.RPC("Status.Ping", struct{}{}, &out); err != structs.ErrNoServers {
t.Fatalf("err: %v", err)
}
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
// Check the members
if len(s1.LANMembers()) != 2 {
t.Fatalf("bad len")
}
if len(c1.LANMembers()) != 2 {
t.Fatalf("bad len")
}
// RPC should succeed
testutil.WaitForResult(func() (bool, error) {
err := c1.RPC("Status.Ping", struct{}{}, &out)
return err == nil, err
}, func(err error) {
t.Fatalf("err: %v", err)
})
}
func TestClient_RPC_Pool(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClient(t)
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try to join.
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
// Wait for both agents to finish joining
testutil.WaitForResult(func() (bool, error) {
return len(s1.LANMembers()) == 2 && len(c1.LANMembers()) == 2, nil
}, func(err error) {
t.Fatalf("Server has %v of %v expected members; Client has %v of %v expected members.",
len(s1.LANMembers()), 2, len(c1.LANMembers()), 2)
})
// Blast out a bunch of RPC requests at the same time to try to get
// contention opening new connections.
var wg sync.WaitGroup
for i := 0; i < 150; i++ {
wg.Add(1)
go func() {
defer wg.Done()
var out struct{}
testutil.WaitForResult(func() (bool, error) {
err := c1.RPC("Status.Ping", struct{}{}, &out)
return err == nil, err
}, func(err error) {
t.Fatalf("err: %v", err)
})
}()
}
wg.Wait()
}
func TestClient_RPC_ConsulServerPing(t *testing.T) {
var servers []*Server
var serverDirs []string
const numServers = 5
for n := numServers; n > 0; n-- {
var bootstrap bool
if n == numServers {
bootstrap = true
}
dir, s := testServerDCBootstrap(t, "dc1", bootstrap)
defer os.RemoveAll(dir)
defer s.Shutdown()
servers = append(servers, s)
serverDirs = append(serverDirs, dir)
}
const numClients = 1
clientDir, c := testClient(t)
defer os.RemoveAll(clientDir)
defer c.Shutdown()
// Join all servers.
for _, s := range servers {
addr := fmt.Sprintf("127.0.0.1:%d",
s.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
}
// Sleep to allow Serf to sync, shuffle, and let the shuffle complete
time.Sleep(1 * time.Second)
c.servers.ResetRebalanceTimer()
time.Sleep(1 * time.Second)
if len(c.LANMembers()) != numServers+numClients {
t.Errorf("bad len: %d", len(c.LANMembers()))
}
for _, s := range servers {
if len(s.LANMembers()) != numServers+numClients {
t.Errorf("bad len: %d", len(s.LANMembers()))
}
}
// Ping each server in the list
var pingCount int
for range servers {
time.Sleep(1 * time.Second)
s := c.servers.FindServer()
ok, err := c.connPool.PingConsulServer(s)
if !ok {
t.Errorf("Unable to ping server %v: %s", s.String(), err)
}
pingCount += 1
// Artificially fail the server in order to rotate the server
// list
c.servers.NotifyFailedServer(s)
}
if pingCount != numServers {
t.Errorf("bad len: %d/%d", pingCount, numServers)
}
}
func TestClient_RPC_TLS(t *testing.T) {
dir1, conf1 := testServerConfig(t, "a.testco.internal")
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, conf2 := testClientConfig(t, "b.testco.internal")
conf2.VerifyOutgoing = true
configureTLS(conf2)
c1, err := NewClient(conf2)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Try an RPC
var out struct{}
if err := c1.RPC("Status.Ping", struct{}{}, &out); err != structs.ErrNoServers {
t.Fatalf("err: %v", err)
}
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
// Wait for joins to finish/RPC to succeed
testutil.WaitForResult(func() (bool, error) {
if len(s1.LANMembers()) != 2 {
return false, fmt.Errorf("bad len: %v", len(s1.LANMembers()))
}
if len(c1.LANMembers()) != 2 {
return false, fmt.Errorf("bad len: %v", len(c1.LANMembers()))
}
err := c1.RPC("Status.Ping", struct{}{}, &out)
return err == nil, err
}, func(err error) {
t.Fatalf("err: %v", err)
})
}
func TestClient_SnapshotRPC(t *testing.T) {
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, c1 := testClient(t)
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Wait for the leader
testutil.WaitForLeader(t, s1.RPC, "dc1")
// Try to join.
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
if len(s1.LANMembers()) != 2 || len(c1.LANMembers()) != 2 {
t.Fatalf("Server has %v of %v expected members; Client has %v of %v expected members.", len(s1.LANMembers()), 2, len(c1.LANMembers()), 2)
}
// Wait until we've got a healthy server.
testutil.WaitForResult(func() (bool, error) {
return c1.servers.NumServers() == 1, nil
}, func(err error) {
t.Fatalf("expected consul server")
})
// Take a snapshot.
var snap bytes.Buffer
args := structs.SnapshotRequest{
Datacenter: "dc1",
Op: structs.SnapshotSave,
}
if err := c1.SnapshotRPC(&args, bytes.NewReader([]byte("")), &snap, nil); err != nil {
t.Fatalf("err: %v", err)
}
// Restore a snapshot.
args.Op = structs.SnapshotRestore
if err := c1.SnapshotRPC(&args, &snap, nil, nil); err != nil {
t.Fatalf("err: %v", err)
}
}
func TestClient_SnapshotRPC_TLS(t *testing.T) {
dir1, conf1 := testServerConfig(t, "a.testco.internal")
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, conf2 := testClientConfig(t, "b.testco.internal")
conf2.VerifyOutgoing = true
configureTLS(conf2)
c1, err := NewClient(conf2)
if err != nil {
t.Fatalf("err: %v", err)
}
defer os.RemoveAll(dir2)
defer c1.Shutdown()
// Wait for the leader
testutil.WaitForLeader(t, s1.RPC, "dc1")
// Try to join.
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
if len(s1.LANMembers()) != 2 || len(c1.LANMembers()) != 2 {
t.Fatalf("Server has %v of %v expected members; Client has %v of %v expected members.", len(s1.LANMembers()), 2, len(c1.LANMembers()), 2)
}
// Wait until we've got a healthy server.
testutil.WaitForResult(func() (bool, error) {
return c1.servers.NumServers() == 1, nil
}, func(err error) {
t.Fatalf("expected consul server")
})
// Take a snapshot.
var snap bytes.Buffer
args := structs.SnapshotRequest{
Datacenter: "dc1",
Op: structs.SnapshotSave,
}
if err := c1.SnapshotRPC(&args, bytes.NewReader([]byte("")), &snap, nil); err != nil {
t.Fatalf("err: %v", err)
}
// Restore a snapshot.
args.Op = structs.SnapshotRestore
if err := c1.SnapshotRPC(&args, &snap, nil, nil); err != nil {
t.Fatalf("err: %v", err)
}
}
func TestClientServer_UserEvent(t *testing.T) {
clientOut := make(chan serf.UserEvent, 2)
dir1, c1 := testClientWithConfig(t, func(conf *Config) {
conf.UserEventHandler = func(e serf.UserEvent) {
clientOut <- e
}
})
defer os.RemoveAll(dir1)
defer c1.Shutdown()
serverOut := make(chan serf.UserEvent, 2)
dir2, s1 := testServerWithConfig(t, func(conf *Config) {
conf.UserEventHandler = func(e serf.UserEvent) {
serverOut <- e
}
})
defer os.RemoveAll(dir2)
defer s1.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := c1.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
// Wait for the leader
testutil.WaitForLeader(t, s1.RPC, "dc1")
// Check the members
testutil.WaitForResult(func() (bool, error) {
return len(c1.LANMembers()) == 2 && len(s1.LANMembers()) == 2, nil
}, func(err error) {
t.Fatalf("bad len")
})
// Fire the user event
codec := rpcClient(t, s1)
event := structs.EventFireRequest{
Name: "foo",
Datacenter: "dc1",
Payload: []byte("baz"),
}
if err := msgpackrpc.CallWithCodec(codec, "Internal.EventFire", &event, nil); err != nil {
t.Fatalf("err: %v", err)
}
// Wait for all the events
var clientReceived, serverReceived bool
for i := 0; i < 2; i++ {
select {
case e := <-clientOut:
switch e.Name {
case "foo":
clientReceived = true
default:
t.Fatalf("Bad: %#v", e)
}
case e := <-serverOut:
switch e.Name {
case "foo":
serverReceived = true
default:
t.Fatalf("Bad: %#v", e)
}
case <-time.After(10 * time.Second):
t.Fatalf("timeout")
}
}
if !serverReceived || !clientReceived {
t.Fatalf("missing events")
}
}
func TestClient_Encrypted(t *testing.T) {
dir1, c1 := testClient(t)
defer os.RemoveAll(dir1)
defer c1.Shutdown()
key := []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
dir2, c2 := testClientWithConfig(t, func(c *Config) {
c.SerfLANConfig.MemberlistConfig.SecretKey = key
})
defer os.RemoveAll(dir2)
defer c2.Shutdown()
if c1.Encrypted() {
t.Fatalf("should not be encrypted")
}
if !c2.Encrypted() {
t.Fatalf("should be encrypted")
}
}