open-nomad/nomad/client_agent_endpoint_test.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1018 lines
22 KiB
Go
Raw Permalink Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
2019-10-25 14:32:20 +00:00
package nomad
import (
"context"
2019-11-01 14:33:28 +00:00
"encoding/json"
2019-10-25 14:32:20 +00:00
"fmt"
"io"
"net"
"strings"
"testing"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/ci"
2019-10-25 14:32:20 +00:00
"github.com/hashicorp/nomad/client"
"github.com/hashicorp/nomad/client/config"
2019-11-01 14:33:28 +00:00
sframer "github.com/hashicorp/nomad/client/lib/streamframer"
2019-10-25 14:32:20 +00:00
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/command/agent/pprof"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
2019-10-25 14:32:20 +00:00
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
2019-11-01 14:33:28 +00:00
"github.com/stretchr/testify/assert"
2019-10-25 14:32:20 +00:00
"github.com/stretchr/testify/require"
)
func TestMonitor_Monitor_Remote_Client(t *testing.T) {
ci.Parallel(t)
2019-10-25 14:32:20 +00:00
require := require.New(t)
// start server and client
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
2019-10-25 14:32:20 +00:00
})
defer cleanupS2()
2019-10-25 14:32:20 +00:00
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
2019-10-25 14:32:20 +00:00
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
})
defer cleanupC()
2019-10-25 14:32:20 +00:00
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
// No node ID to monitor the remote server
req := cstructs.MonitorRequest{
LogLevel: "debug",
NodeID: c.NodeID(),
}
handler, err := s1.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
2019-11-01 14:33:28 +00:00
timeout := time.After(3 * time.Second)
2019-10-25 14:32:20 +00:00
expected := "[DEBUG]"
received := ""
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout waiting for logs")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error != nil {
t.Fatalf("Got error: %v", msg.Error.Error())
}
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
if strings.Contains(received, expected) {
require.Nil(p2.Close())
break OUTER
}
}
}
}
func TestMonitor_Monitor_RemoteServer(t *testing.T) {
ci.Parallel(t)
foreignRegion := "foo"
// start servers
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
})
defer cleanupS2()
s3, cleanupS3 := TestServer(t, func(c *Config) {
c.Region = foreignRegion
})
defer cleanupS3()
TestJoin(t, s1, s2, s3)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
testutil.WaitForLeader(t, s3.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
cases := []struct {
desc string
serverID string
expectedLog string
logger hclog.InterceptLogger
origin *Server
region string
expectedErr string
}{
{
desc: "remote leader",
serverID: "leader",
expectedLog: "leader log",
logger: leader.logger,
origin: nonLeader,
region: "global",
},
{
desc: "remote server, server name",
serverID: nonLeader.serf.LocalMember().Name,
expectedLog: "nonleader log",
logger: nonLeader.logger,
origin: leader,
region: "global",
},
{
desc: "remote server, server UUID",
serverID: nonLeader.serf.LocalMember().Tags["id"],
expectedLog: "nonleader log",
logger: nonLeader.logger,
origin: leader,
region: "global",
},
{
desc: "serverID is current leader",
serverID: "leader",
expectedLog: "leader log",
logger: leader.logger,
origin: leader,
region: "global",
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
expectedLog: "non leader log",
logger: nonLeader.logger,
origin: nonLeader,
region: "global",
},
{
desc: "remote server, different region",
serverID: s3.serf.LocalMember().Name,
expectedLog: "remote region logger",
logger: s3.logger,
origin: nonLeader,
region: foreignRegion,
},
{
desc: "different region, region mismatch",
serverID: s3.serf.LocalMember().Name,
expectedLog: "remote region logger",
logger: s3.logger,
origin: nonLeader,
region: "bar",
expectedErr: "No path to region",
},
}
for i := range cases {
tc := cases[i]
t.Run(tc.desc, func(t *testing.T) {
require := require.New(t)
// send some specific logs
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
for {
select {
case <-ctx.Done():
return
default:
tc.logger.Warn(tc.expectedLog)
time.Sleep(10 * time.Millisecond)
}
}
}()
req := cstructs.MonitorRequest{
LogLevel: "warn",
ServerID: tc.serverID,
QueryOptions: structs.QueryOptions{
Region: tc.region,
},
}
handler, err := tc.origin.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
2019-10-25 14:32:20 +00:00
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
2019-11-01 14:33:28 +00:00
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(2 * time.Second)
received := ""
OUTER:
for {
select {
case <-timeout:
require.Fail("timeout waiting for logs")
case err := <-errCh:
require.Fail(err.Error())
case msg := <-streamMsg:
if msg.Error != nil {
if tc.expectedErr != "" {
require.Contains(msg.Error.Error(), tc.expectedErr)
break OUTER
} else {
require.Failf("Got error: %v", msg.Error.Error())
}
} else {
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
if strings.Contains(received, tc.expectedLog) {
cancel()
require.Nil(p2.Close())
break OUTER
}
}
}
2019-10-25 14:32:20 +00:00
}
})
2019-10-25 14:32:20 +00:00
}
}
func TestMonitor_MonitorServer(t *testing.T) {
ci.Parallel(t)
2019-10-25 14:32:20 +00:00
require := require.New(t)
// start server
s, cleanupS := TestServer(t, nil)
defer cleanupS()
2019-10-25 14:32:20 +00:00
testutil.WaitForLeader(t, s.RPC)
// No node ID to monitor the remote server
req := cstructs.MonitorRequest{
LogLevel: "debug",
2020-01-29 18:55:14 +00:00
QueryOptions: structs.QueryOptions{
Region: "global",
},
2019-10-25 14:32:20 +00:00
}
handler, err := s.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(1 * time.Second)
expected := "[DEBUG]"
received := ""
2020-05-09 18:40:00 +00:00
done := make(chan struct{})
defer close(done)
2019-10-25 14:32:20 +00:00
// send logs
go func() {
for {
2020-05-09 18:40:00 +00:00
select {
case <-time.After(100 * time.Millisecond):
s.logger.Debug("test log")
case <-done:
return
}
2019-10-25 14:32:20 +00:00
}
}()
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout waiting for logs")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error != nil {
t.Fatalf("Got error: %v", msg.Error.Error())
}
2019-11-01 14:33:28 +00:00
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
2019-10-25 14:32:20 +00:00
if strings.Contains(received, expected) {
require.Nil(p2.Close())
break OUTER
}
}
}
}
func TestMonitor_Monitor_ACL(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
2019-10-25 18:25:19 +00:00
policyGood := mock.AgentPolicy(acl.PolicyRead)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: structs.ErrPermissionDenied.Error(),
},
{
Name: "good token",
Token: tokenGood.SecretID,
ExpectedErr: "Unknown log level",
},
{
Name: "root token",
Token: root.SecretID,
ExpectedErr: "Unknown log level",
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := &cstructs.MonitorRequest{
LogLevel: "unknown",
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
handler, err := s.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(5 * time.Second)
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error == nil {
continue
}
if strings.Contains(msg.Error.Error(), tc.ExpectedErr) {
break OUTER
} else {
t.Fatalf("Bad error: %v", msg.Error)
}
}
}
})
}
}
func TestAgentProfile_RemoteClient(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server and client
s1, cleanup := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
c.EnableDebug = true
})
defer cleanupC()
testutil.WaitForClient(t, s2.RPC, c.NodeID(), c.Region())
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
NodeID: c.NodeID(),
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
require.NoError(err)
require.NotNil(reply.Payload)
require.Equal(c.NodeID(), reply.AgentID)
}
// Test that we prevent a forwarding loop if the requested
// serverID does not exist in the requested region
func TestAgentProfile_RemoteRegionMisMatch(t *testing.T) {
require := require.New(t)
// start server and client
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "foo"
c.EnableDebug = true
})
defer cleanupS1()
s2, cleanup := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "bar"
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
ServerID: s1.serf.LocalMember().Name,
QueryOptions: structs.QueryOptions{
Region: "bar",
},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
2020-01-29 18:55:14 +00:00
require.Contains(err.Error(), "unknown Nomad server")
require.Nil(reply.Payload)
}
// Test that Agent.Profile can forward to a different region
func TestAgentProfile_RemoteRegion(t *testing.T) {
require := require.New(t)
// start server and client
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "foo"
})
defer cleanupS1()
s2, cleanup := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "bar"
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
ServerID: s2.serf.LocalMember().Name,
QueryOptions: structs.QueryOptions{
Region: "bar",
},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
require.NoError(err)
require.NotNil(reply.Payload)
require.Equal(s2.serf.LocalMember().Name, reply.AgentID)
}
func TestAgentProfile_Server(t *testing.T) {
ci.Parallel(t)
// start servers
s1, cleanup := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
Simplify Bootstrap logic in tests This change updates tests to honor `BootstrapExpect` exclusively when forming test clusters and removes test only knobs, e.g. `config.DevDisableBootstrap`. Background: Test cluster creation is fragile. Test servers don't follow the BootstapExpected route like production clusters. Instead they start as single node clusters and then get rejoin and may risk causing brain split or other test flakiness. The test framework expose few knobs to control those (e.g. `config.DevDisableBootstrap` and `config.Bootstrap`) that control whether a server should bootstrap the cluster. These flags are confusing and it's unclear when to use: their usage in multi-node cluster isn't properly documented. Furthermore, they have some bad side-effects as they don't control Raft library: If `config.DevDisableBootstrap` is true, the test server may not immediately attempt to bootstrap a cluster, but after an election timeout (~50ms), Raft may force a leadership election and win it (with only one vote) and cause a split brain. The knobs are also confusing as Bootstrap is an overloaded term. In BootstrapExpect, we refer to bootstrapping the cluster only after N servers are connected. But in tests and the knobs above, it refers to whether the server is a single node cluster and shouldn't wait for any other server. Changes: This commit makes two changes: First, it relies on `BootstrapExpected` instead of `Bootstrap` and/or `DevMode` flags. This change is relatively trivial. Introduce a `Bootstrapped` flag to track if the cluster is bootstrapped. This allows us to keep `BootstrapExpected` immutable. Previously, the flag was a config value but it gets set to 0 after cluster bootstrap completes.
2020-03-02 15:29:24 +00:00
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
cases := []struct {
desc string
serverID string
origin *Server
expectedErr string
expectedAgentID string
reqType pprof.ReqType
}{
{
desc: "remote leader",
serverID: "leader",
origin: nonLeader,
reqType: pprof.CmdReq,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "remote server",
serverID: nonLeader.serf.LocalMember().Name,
origin: leader,
reqType: pprof.CmdReq,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is current leader",
serverID: "leader",
origin: leader,
reqType: pprof.CmdReq,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
origin: nonLeader,
reqType: pprof.CPUReq,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is unknown",
serverID: uuid.Generate(),
origin: nonLeader,
reqType: pprof.CmdReq,
2020-01-29 18:55:14 +00:00
expectedErr: "unknown Nomad server",
expectedAgentID: "",
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
require := require.New(t)
req := structs.AgentPprofRequest{
ReqType: tc.reqType,
ServerID: tc.serverID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.AgentPprofResponse{}
err := tc.origin.RPC("Agent.Profile", &req, &reply)
if tc.expectedErr != "" {
require.Contains(err.Error(), tc.expectedErr)
} else {
require.Nil(err)
require.NotNil(reply.Payload)
}
require.Equal(tc.expectedAgentID, reply.AgentID)
})
}
}
func TestAgentProfile_ACL(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
policyGood := mock.AgentPolicy(acl.PolicyWrite)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: "Permission denied",
},
{
Name: "good token",
Token: tokenGood.SecretID,
},
{
Name: "root token",
Token: root.SecretID,
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := &structs.AgentPprofRequest{
ReqType: pprof.CmdReq,
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
reply := &structs.AgentPprofResponse{}
err := s.RPC("Agent.Profile", req, reply)
if tc.ExpectedErr != "" {
require.Equal(tc.ExpectedErr, err.Error())
} else {
require.NoError(err)
require.NotNil(reply.Payload)
}
})
}
}
func TestAgentHost_Server(t *testing.T) {
ci.Parallel(t)
// start servers
s1, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
c.EnableDebug = true
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
cases := []struct {
desc string
serverID string
nodeID string
origin *Server
expectedErr string
expectedAgentID string
}{
{
desc: "remote leader",
serverID: "leader",
origin: nonLeader,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "remote server",
serverID: nonLeader.serf.LocalMember().Name,
origin: leader,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is current leader",
serverID: "leader",
origin: leader,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
origin: nonLeader,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is unknown",
serverID: uuid.Generate(),
origin: nonLeader,
expectedErr: "unknown Nomad server",
expectedAgentID: "",
},
{
desc: "local client",
nodeID: c.NodeID(),
origin: s2,
expectedErr: "",
expectedAgentID: c.NodeID(),
},
{
desc: "remote client",
nodeID: c.NodeID(),
origin: s1,
expectedErr: "",
expectedAgentID: c.NodeID(),
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
req := structs.HostDataRequest{
ServerID: tc.serverID,
NodeID: tc.nodeID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.HostDataResponse{}
err := tc.origin.RPC("Agent.Host", &req, &reply)
if tc.expectedErr != "" {
require.Contains(t, err.Error(), tc.expectedErr)
} else {
require.Nil(t, err)
require.NotEmpty(t, reply.HostData)
}
require.Equal(t, tc.expectedAgentID, reply.AgentID)
})
}
}
func TestAgentHost_ACL(t *testing.T) {
ci.Parallel(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
policyGood := mock.AgentPolicy(acl.PolicyRead)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: "Permission denied",
},
{
Name: "good token",
Token: tokenGood.SecretID,
},
{
Name: "root token",
Token: root.SecretID,
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := structs.HostDataRequest{
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
var resp structs.HostDataResponse
err := s.RPC("Agent.Host", &req, &resp)
if tc.ExpectedErr != "" {
require.Equal(t, tc.ExpectedErr, err.Error())
} else {
require.NoError(t, err)
require.NotNil(t, resp.HostData)
}
})
}
}
func TestAgentHost_ACLDebugRequired(t *testing.T) {
ci.Parallel(t)
// start server
s, cleanupS := TestServer(t, func(c *Config) {
c.EnableDebug = false
})
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
req := structs.HostDataRequest{
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
},
}
var resp structs.HostDataResponse
err := s.RPC("Agent.Host", &req, &resp)
require.Equal(t, "Permission denied", err.Error())
}