acbfeb5815
This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
580 lines
15 KiB
Go
580 lines
15 KiB
Go
package nomad
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/nomad/structs/config"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func tmpDir(t *testing.T) string {
|
|
t.Helper()
|
|
dir, err := ioutil.TempDir("", "nomad")
|
|
if err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
return dir
|
|
}
|
|
|
|
func TestServer_RPC(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
s1, cleanupS1 := TestServer(t, nil)
|
|
defer cleanupS1()
|
|
|
|
var out struct{}
|
|
if err := s1.RPC("Status.Ping", struct{}{}, &out); err != nil {
|
|
t.Fatalf("err: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestServer_RPC_TLS(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node1")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS1()
|
|
|
|
s2, cleanupS2 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node2")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS2()
|
|
|
|
s3, cleanupS3 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node3")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS3()
|
|
|
|
TestJoin(t, s1, s2, s3)
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
// Part of a server joining is making an RPC request, so just by testing
|
|
// that there is a leader we verify that the RPCs are working over TLS.
|
|
}
|
|
|
|
func TestServer_RPC_MixedTLS(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node1")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS1()
|
|
|
|
s2, cleanupS2 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node2")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS2()
|
|
|
|
s3, cleanupS3 := TestServer(t, func(c *Config) {
|
|
c.Region = "regionFoo"
|
|
c.BootstrapExpect = 3
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node3")
|
|
})
|
|
defer cleanupS3()
|
|
|
|
TestJoin(t, s1, s2, s3)
|
|
|
|
// Ensure that we do not form a quorum
|
|
start := time.Now()
|
|
for {
|
|
if time.Now().After(start.Add(2 * time.Second)) {
|
|
break
|
|
}
|
|
|
|
args := &structs.GenericRequest{}
|
|
var leader string
|
|
err := s1.RPC("Status.Leader", args, &leader)
|
|
if err == nil || leader != "" {
|
|
t.Fatalf("Got leader or no error: %q %v", leader, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestServer_Regions(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Make the servers
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.Region = "region1"
|
|
})
|
|
defer cleanupS1()
|
|
|
|
s2, cleanupS2 := TestServer(t, func(c *Config) {
|
|
c.Region = "region2"
|
|
})
|
|
defer cleanupS2()
|
|
|
|
// Join them together
|
|
s2Addr := fmt.Sprintf("127.0.0.1:%d",
|
|
s2.config.SerfConfig.MemberlistConfig.BindPort)
|
|
if n, err := s1.Join([]string{s2Addr}); err != nil || n != 1 {
|
|
t.Fatalf("Failed joining: %v (%d joined)", err, n)
|
|
}
|
|
|
|
// Try listing the regions
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
out := s1.Regions()
|
|
if len(out) != 2 || out[0] != "region1" || out[1] != "region2" {
|
|
return false, fmt.Errorf("unexpected regions: %v", out)
|
|
}
|
|
return true, nil
|
|
}, func(err error) {
|
|
t.Fatalf("err: %v", err)
|
|
})
|
|
}
|
|
|
|
func TestServer_Reload_Vault(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.Region = "global"
|
|
})
|
|
defer cleanupS1()
|
|
|
|
if s1.vault.Running() {
|
|
t.Fatalf("Vault client should not be running")
|
|
}
|
|
|
|
tr := true
|
|
config := DefaultConfig()
|
|
config.VaultConfig.Enabled = &tr
|
|
config.VaultConfig.Token = uuid.Generate()
|
|
|
|
if err := s1.Reload(config); err != nil {
|
|
t.Fatalf("Reload failed: %v", err)
|
|
}
|
|
|
|
if !s1.vault.Running() {
|
|
t.Fatalf("Vault client should be running")
|
|
}
|
|
}
|
|
|
|
func connectionReset(msg string) bool {
|
|
return strings.Contains(msg, "EOF") || strings.Contains(msg, "connection reset by peer")
|
|
}
|
|
|
|
// Tests that the server will successfully reload its network connections,
|
|
// upgrading from plaintext to TLS if the server's TLS configuration changes.
|
|
func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) {
|
|
t.Parallel()
|
|
assert := assert.New(t)
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.DataDir = path.Join(dir, "nodeA")
|
|
})
|
|
defer cleanupS1()
|
|
|
|
// assert that the server started in plaintext mode
|
|
assert.Equal(s1.config.TLSConfig.CertFile, "")
|
|
|
|
newTLSConfig := &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
|
|
err := s1.reloadTLSConnections(newTLSConfig)
|
|
assert.Nil(err)
|
|
assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
|
|
|
|
codec := rpcClient(t, s1)
|
|
|
|
node := mock.Node()
|
|
req := &structs.NodeRegisterRequest{
|
|
Node: node,
|
|
WriteRequest: structs.WriteRequest{Region: "global"},
|
|
}
|
|
|
|
var resp structs.GenericResponse
|
|
err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
|
|
assert.NotNil(err)
|
|
assert.True(connectionReset(err.Error()))
|
|
}
|
|
|
|
// Tests that the server will successfully reload its network connections,
|
|
// downgrading from TLS to plaintext if the server's TLS configuration changes.
|
|
func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) {
|
|
t.Parallel()
|
|
assert := assert.New(t)
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.DataDir = path.Join(dir, "nodeB")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS1()
|
|
|
|
newTLSConfig := &config.TLSConfig{}
|
|
|
|
err := s1.reloadTLSConnections(newTLSConfig)
|
|
assert.Nil(err)
|
|
assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
|
|
|
|
codec := rpcClient(t, s1)
|
|
|
|
node := mock.Node()
|
|
req := &structs.NodeRegisterRequest{
|
|
Node: node,
|
|
WriteRequest: structs.WriteRequest{Region: "global"},
|
|
}
|
|
|
|
var resp structs.GenericResponse
|
|
err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
|
|
assert.Nil(err)
|
|
}
|
|
|
|
// Tests that the server will successfully reload its network connections,
|
|
// downgrading only RPC connections
|
|
func TestServer_Reload_TLSConnections_TLSToPlaintext_OnlyRPC(t *testing.T) {
|
|
t.Parallel()
|
|
assert := assert.New(t)
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.DataDir = path.Join(dir, "nodeB")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS1()
|
|
|
|
newTLSConfig := &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: false,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
|
|
err := s1.reloadTLSConnections(newTLSConfig)
|
|
assert.Nil(err)
|
|
assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
|
|
|
|
codec := rpcClient(t, s1)
|
|
|
|
node := mock.Node()
|
|
req := &structs.NodeRegisterRequest{
|
|
Node: node,
|
|
WriteRequest: structs.WriteRequest{Region: "global"},
|
|
}
|
|
|
|
var resp structs.GenericResponse
|
|
err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
|
|
assert.Nil(err)
|
|
}
|
|
|
|
// Tests that the server will successfully reload its network connections,
|
|
// upgrading only RPC connections
|
|
func TestServer_Reload_TLSConnections_PlaintextToTLS_OnlyRPC(t *testing.T) {
|
|
t.Parallel()
|
|
assert := assert.New(t)
|
|
|
|
const (
|
|
cafile = "../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
)
|
|
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.DataDir = path.Join(dir, "nodeB")
|
|
c.TLSConfig = &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: false,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
})
|
|
defer cleanupS1()
|
|
|
|
newTLSConfig := &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
EnableRPC: true,
|
|
VerifyServerHostname: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
|
|
err := s1.reloadTLSConnections(newTLSConfig)
|
|
assert.Nil(err)
|
|
assert.True(s1.config.TLSConfig.EnableRPC)
|
|
assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig))
|
|
|
|
codec := rpcClient(t, s1)
|
|
|
|
node := mock.Node()
|
|
req := &structs.NodeRegisterRequest{
|
|
Node: node,
|
|
WriteRequest: structs.WriteRequest{Region: "global"},
|
|
}
|
|
|
|
var resp structs.GenericResponse
|
|
err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp)
|
|
assert.NotNil(err)
|
|
assert.True(connectionReset(err.Error()))
|
|
}
|
|
|
|
// Test that Raft connections are reloaded as expected when a Nomad server is
|
|
// upgraded from plaintext to TLS
|
|
func TestServer_Reload_TLSConnections_Raft(t *testing.T) {
|
|
t.Parallel()
|
|
assert := assert.New(t)
|
|
|
|
const (
|
|
cafile = "../../helper/tlsutil/testdata/ca.pem"
|
|
foocert = "../../helper/tlsutil/testdata/nomad-foo.pem"
|
|
fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem"
|
|
barcert = "../dev/tls_cluster/certs/nomad.pem"
|
|
barkey = "../dev/tls_cluster/certs/nomad-key.pem"
|
|
)
|
|
dir := tmpDir(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
s1, cleanupS1 := TestServer(t, func(c *Config) {
|
|
c.BootstrapExpect = 2
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node1")
|
|
c.NodeName = "node1"
|
|
c.Region = "regionFoo"
|
|
})
|
|
defer cleanupS1()
|
|
|
|
s2, cleanupS2 := TestServer(t, func(c *Config) {
|
|
c.BootstrapExpect = 2
|
|
c.DevMode = false
|
|
c.DataDir = path.Join(dir, "node2")
|
|
c.NodeName = "node2"
|
|
c.Region = "regionFoo"
|
|
})
|
|
defer cleanupS2()
|
|
|
|
TestJoin(t, s1, s2)
|
|
servers := []*Server{s1, s2}
|
|
|
|
testutil.WaitForLeader(t, s1.RPC)
|
|
|
|
newTLSConfig := &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
VerifyHTTPSClient: true,
|
|
CAFile: cafile,
|
|
CertFile: foocert,
|
|
KeyFile: fookey,
|
|
}
|
|
|
|
err := s1.reloadTLSConnections(newTLSConfig)
|
|
assert.Nil(err)
|
|
|
|
{
|
|
for _, serv := range servers {
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
args := &structs.GenericRequest{}
|
|
var leader string
|
|
err := serv.RPC("Status.Leader", args, &leader)
|
|
if leader != "" && err != nil {
|
|
return false, fmt.Errorf("Should not have found leader but got %s", leader)
|
|
}
|
|
return true, nil
|
|
}, func(err error) {
|
|
t.Fatalf("err: %v", err)
|
|
})
|
|
}
|
|
}
|
|
|
|
secondNewTLSConfig := &config.TLSConfig{
|
|
EnableHTTP: true,
|
|
VerifyHTTPSClient: true,
|
|
CAFile: cafile,
|
|
CertFile: barcert,
|
|
KeyFile: barkey,
|
|
}
|
|
|
|
// Now, transition the other server to TLS, which should restore their
|
|
// ability to communicate.
|
|
err = s2.reloadTLSConnections(secondNewTLSConfig)
|
|
assert.Nil(err)
|
|
|
|
testutil.WaitForLeader(t, s2.RPC)
|
|
}
|
|
|
|
func TestServer_InvalidSchedulers(t *testing.T) {
|
|
t.Parallel()
|
|
require := require.New(t)
|
|
|
|
// Set the config to not have the core scheduler
|
|
config := DefaultConfig()
|
|
logger := testlog.HCLogger(t)
|
|
s := &Server{
|
|
config: config,
|
|
logger: logger,
|
|
}
|
|
|
|
config.EnabledSchedulers = []string{"batch"}
|
|
err := s.setupWorkers()
|
|
require.NotNil(err)
|
|
require.Contains(err.Error(), "scheduler not enabled")
|
|
|
|
// Set the config to have an unknown scheduler
|
|
config.EnabledSchedulers = []string{"batch", structs.JobTypeCore, "foo"}
|
|
err = s.setupWorkers()
|
|
require.NotNil(err)
|
|
require.Contains(err.Error(), "foo")
|
|
}
|
|
|
|
func TestServer_RPCNameAndRegionValidation(t *testing.T) {
|
|
t.Parallel()
|
|
for _, tc := range []struct {
|
|
name string
|
|
region string
|
|
expected bool
|
|
}{
|
|
// OK
|
|
{name: "client.global.nomad", region: "global", expected: true},
|
|
{name: "server.global.nomad", region: "global", expected: true},
|
|
{name: "server.other.nomad", region: "global", expected: true},
|
|
{name: "server.other.region.nomad", region: "other.region", expected: true},
|
|
|
|
// Bad
|
|
{name: "client.other.nomad", region: "global", expected: false},
|
|
{name: "client.global.nomad.other", region: "global", expected: false},
|
|
{name: "server.global.nomad.other", region: "global", expected: false},
|
|
{name: "other.global.nomad", region: "global", expected: false},
|
|
{name: "server.nomad", region: "global", expected: false},
|
|
{name: "localhost", region: "global", expected: false},
|
|
} {
|
|
assert.Equal(t, tc.expected, validateRPCRegionPeer(tc.name, tc.region),
|
|
"expected %q in region %q to validate as %v",
|
|
tc.name, tc.region, tc.expected)
|
|
}
|
|
}
|