open-nomad/nomad/client_agent_endpoint_test.go

1018 lines
22 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package nomad
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"strings"
"testing"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client"
"github.com/hashicorp/nomad/client/config"
sframer "github.com/hashicorp/nomad/client/lib/streamframer"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/command/agent/pprof"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMonitor_Monitor_Remote_Client(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server and client
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS2()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
// No node ID to monitor the remote server
req := cstructs.MonitorRequest{
LogLevel: "debug",
NodeID: c.NodeID(),
}
handler, err := s1.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(3 * time.Second)
expected := "[DEBUG]"
received := ""
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout waiting for logs")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error != nil {
t.Fatalf("Got error: %v", msg.Error.Error())
}
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
if strings.Contains(received, expected) {
require.Nil(p2.Close())
break OUTER
}
}
}
}
func TestMonitor_Monitor_RemoteServer(t *testing.T) {
ci.Parallel(t)
foreignRegion := "foo"
// start servers
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS1()
s2, cleanupS2 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanupS2()
s3, cleanupS3 := TestServer(t, func(c *Config) {
c.Region = foreignRegion
})
defer cleanupS3()
TestJoin(t, s1, s2, s3)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
testutil.WaitForLeader(t, s3.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
cases := []struct {
desc string
serverID string
expectedLog string
logger hclog.InterceptLogger
origin *Server
region string
expectedErr string
}{
{
desc: "remote leader",
serverID: "leader",
expectedLog: "leader log",
logger: leader.logger,
origin: nonLeader,
region: "global",
},
{
desc: "remote server, server name",
serverID: nonLeader.serf.LocalMember().Name,
expectedLog: "nonleader log",
logger: nonLeader.logger,
origin: leader,
region: "global",
},
{
desc: "remote server, server UUID",
serverID: nonLeader.serf.LocalMember().Tags["id"],
expectedLog: "nonleader log",
logger: nonLeader.logger,
origin: leader,
region: "global",
},
{
desc: "serverID is current leader",
serverID: "leader",
expectedLog: "leader log",
logger: leader.logger,
origin: leader,
region: "global",
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
expectedLog: "non leader log",
logger: nonLeader.logger,
origin: nonLeader,
region: "global",
},
{
desc: "remote server, different region",
serverID: s3.serf.LocalMember().Name,
expectedLog: "remote region logger",
logger: s3.logger,
origin: nonLeader,
region: foreignRegion,
},
{
desc: "different region, region mismatch",
serverID: s3.serf.LocalMember().Name,
expectedLog: "remote region logger",
logger: s3.logger,
origin: nonLeader,
region: "bar",
expectedErr: "No path to region",
},
}
for i := range cases {
tc := cases[i]
t.Run(tc.desc, func(t *testing.T) {
require := require.New(t)
// send some specific logs
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
for {
select {
case <-ctx.Done():
return
default:
tc.logger.Warn(tc.expectedLog)
time.Sleep(10 * time.Millisecond)
}
}
}()
req := cstructs.MonitorRequest{
LogLevel: "warn",
ServerID: tc.serverID,
QueryOptions: structs.QueryOptions{
Region: tc.region,
},
}
handler, err := tc.origin.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(2 * time.Second)
received := ""
OUTER:
for {
select {
case <-timeout:
require.Fail("timeout waiting for logs")
case err := <-errCh:
require.Fail(err.Error())
case msg := <-streamMsg:
if msg.Error != nil {
if tc.expectedErr != "" {
require.Contains(msg.Error.Error(), tc.expectedErr)
break OUTER
} else {
require.Failf("Got error: %v", msg.Error.Error())
}
} else {
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
if strings.Contains(received, tc.expectedLog) {
cancel()
require.Nil(p2.Close())
break OUTER
}
}
}
}
})
}
}
func TestMonitor_MonitorServer(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server
s, cleanupS := TestServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
// No node ID to monitor the remote server
req := cstructs.MonitorRequest{
LogLevel: "debug",
QueryOptions: structs.QueryOptions{
Region: "global",
},
}
handler, err := s.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(1 * time.Second)
expected := "[DEBUG]"
received := ""
done := make(chan struct{})
defer close(done)
// send logs
go func() {
for {
select {
case <-time.After(100 * time.Millisecond):
s.logger.Debug("test log")
case <-done:
return
}
}
}()
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout waiting for logs")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error != nil {
t.Fatalf("Got error: %v", msg.Error.Error())
}
var frame sframer.StreamFrame
err := json.Unmarshal(msg.Payload, &frame)
assert.NoError(t, err)
received += string(frame.Data)
if strings.Contains(received, expected) {
require.Nil(p2.Close())
break OUTER
}
}
}
}
func TestMonitor_Monitor_ACL(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
policyGood := mock.AgentPolicy(acl.PolicyRead)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: structs.ErrPermissionDenied.Error(),
},
{
Name: "good token",
Token: tokenGood.SecretID,
ExpectedErr: "Unknown log level",
},
{
Name: "root token",
Token: root.SecretID,
ExpectedErr: "Unknown log level",
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := &cstructs.MonitorRequest{
LogLevel: "unknown",
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
handler, err := s.StreamingRpcHandler("Agent.Monitor")
require.Nil(err)
// create pipe
p1, p2 := net.Pipe()
defer p1.Close()
defer p2.Close()
errCh := make(chan error)
streamMsg := make(chan *cstructs.StreamErrWrapper)
go handler(p2)
// Start decoder
go func() {
decoder := codec.NewDecoder(p1, structs.MsgpackHandle)
for {
var msg cstructs.StreamErrWrapper
if err := decoder.Decode(&msg); err != nil {
if err == io.EOF || strings.Contains(err.Error(), "closed") {
return
}
errCh <- fmt.Errorf("error decoding: %v", err)
}
streamMsg <- &msg
}
}()
// send request
encoder := codec.NewEncoder(p1, structs.MsgpackHandle)
require.Nil(encoder.Encode(req))
timeout := time.After(5 * time.Second)
OUTER:
for {
select {
case <-timeout:
t.Fatal("timeout")
case err := <-errCh:
t.Fatal(err)
case msg := <-streamMsg:
if msg.Error == nil {
continue
}
if strings.Contains(msg.Error.Error(), tc.ExpectedErr) {
break OUTER
} else {
t.Fatalf("Bad error: %v", msg.Error)
}
}
}
})
}
}
func TestAgentProfile_RemoteClient(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server and client
s1, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
c.EnableDebug = true
})
defer cleanupC()
testutil.WaitForClient(t, s2.RPC, c.NodeID(), c.Region())
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
NodeID: c.NodeID(),
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
require.NoError(err)
require.NotNil(reply.Payload)
require.Equal(c.NodeID(), reply.AgentID)
}
// Test that we prevent a forwarding loop if the requested
// serverID does not exist in the requested region
func TestAgentProfile_RemoteRegionMisMatch(t *testing.T) {
require := require.New(t)
// start server and client
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "foo"
c.EnableDebug = true
})
defer cleanupS1()
s2, cleanup := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "bar"
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
ServerID: s1.serf.LocalMember().Name,
QueryOptions: structs.QueryOptions{
Region: "bar",
},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
require.Contains(err.Error(), "unknown Nomad server")
require.Nil(reply.Payload)
}
// Test that Agent.Profile can forward to a different region
func TestAgentProfile_RemoteRegion(t *testing.T) {
require := require.New(t)
// start server and client
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "foo"
})
defer cleanupS1()
s2, cleanup := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
c.Region = "bar"
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
req := structs.AgentPprofRequest{
ReqType: pprof.CPUReq,
ServerID: s2.serf.LocalMember().Name,
QueryOptions: structs.QueryOptions{
Region: "bar",
},
}
reply := structs.AgentPprofResponse{}
err := s1.RPC("Agent.Profile", &req, &reply)
require.NoError(err)
require.NotNil(reply.Payload)
require.Equal(s2.serf.LocalMember().Name, reply.AgentID)
}
func TestAgentProfile_Server(t *testing.T) {
ci.Parallel(t)
// start servers
s1, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
cases := []struct {
desc string
serverID string
origin *Server
expectedErr string
expectedAgentID string
reqType pprof.ReqType
}{
{
desc: "remote leader",
serverID: "leader",
origin: nonLeader,
reqType: pprof.CmdReq,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "remote server",
serverID: nonLeader.serf.LocalMember().Name,
origin: leader,
reqType: pprof.CmdReq,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is current leader",
serverID: "leader",
origin: leader,
reqType: pprof.CmdReq,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
origin: nonLeader,
reqType: pprof.CPUReq,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is unknown",
serverID: uuid.Generate(),
origin: nonLeader,
reqType: pprof.CmdReq,
expectedErr: "unknown Nomad server",
expectedAgentID: "",
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
require := require.New(t)
req := structs.AgentPprofRequest{
ReqType: tc.reqType,
ServerID: tc.serverID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.AgentPprofResponse{}
err := tc.origin.RPC("Agent.Profile", &req, &reply)
if tc.expectedErr != "" {
require.Contains(err.Error(), tc.expectedErr)
} else {
require.Nil(err)
require.NotNil(reply.Payload)
}
require.Equal(tc.expectedAgentID, reply.AgentID)
})
}
}
func TestAgentProfile_ACL(t *testing.T) {
ci.Parallel(t)
require := require.New(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
policyGood := mock.AgentPolicy(acl.PolicyWrite)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: "Permission denied",
},
{
Name: "good token",
Token: tokenGood.SecretID,
},
{
Name: "root token",
Token: root.SecretID,
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := &structs.AgentPprofRequest{
ReqType: pprof.CmdReq,
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
reply := &structs.AgentPprofResponse{}
err := s.RPC("Agent.Profile", req, reply)
if tc.ExpectedErr != "" {
require.Equal(tc.ExpectedErr, err.Error())
} else {
require.NoError(err)
require.NotNil(reply.Payload)
}
})
}
}
func TestAgentHost_Server(t *testing.T) {
ci.Parallel(t)
// start servers
s1, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
s2, cleanup := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.EnableDebug = true
})
defer cleanup()
TestJoin(t, s1, s2)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForLeader(t, s2.RPC)
// determine leader and nonleader
servers := []*Server{s1, s2}
var nonLeader *Server
var leader *Server
for _, s := range servers {
if !s.IsLeader() {
nonLeader = s
} else {
leader = s
}
}
c, cleanupC := client.TestClient(t, func(c *config.Config) {
c.Servers = []string{s2.GetConfig().RPCAddr.String()}
c.EnableDebug = true
})
defer cleanupC()
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil
}, func(err error) {
t.Fatalf("should have a clients")
})
cases := []struct {
desc string
serverID string
nodeID string
origin *Server
expectedErr string
expectedAgentID string
}{
{
desc: "remote leader",
serverID: "leader",
origin: nonLeader,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "remote server",
serverID: nonLeader.serf.LocalMember().Name,
origin: leader,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is current leader",
serverID: "leader",
origin: leader,
expectedAgentID: leader.serf.LocalMember().Name,
},
{
desc: "serverID is current server",
serverID: nonLeader.serf.LocalMember().Name,
origin: nonLeader,
expectedAgentID: nonLeader.serf.LocalMember().Name,
},
{
desc: "serverID is unknown",
serverID: uuid.Generate(),
origin: nonLeader,
expectedErr: "unknown Nomad server",
expectedAgentID: "",
},
{
desc: "local client",
nodeID: c.NodeID(),
origin: s2,
expectedErr: "",
expectedAgentID: c.NodeID(),
},
{
desc: "remote client",
nodeID: c.NodeID(),
origin: s1,
expectedErr: "",
expectedAgentID: c.NodeID(),
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
req := structs.HostDataRequest{
ServerID: tc.serverID,
NodeID: tc.nodeID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
reply := structs.HostDataResponse{}
err := tc.origin.RPC("Agent.Host", &req, &reply)
if tc.expectedErr != "" {
require.Contains(t, err.Error(), tc.expectedErr)
} else {
require.Nil(t, err)
require.NotEmpty(t, reply.HostData)
}
require.Equal(t, tc.expectedAgentID, reply.AgentID)
})
}
}
func TestAgentHost_ACL(t *testing.T) {
ci.Parallel(t)
// start server
s, root, cleanupS := TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS})
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad)
policyGood := mock.AgentPolicy(acl.PolicyRead)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood)
cases := []struct {
Name string
Token string
ExpectedErr string
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
ExpectedErr: "Permission denied",
},
{
Name: "good token",
Token: tokenGood.SecretID,
},
{
Name: "root token",
Token: root.SecretID,
},
}
for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := structs.HostDataRequest{
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
AuthToken: tc.Token,
},
}
var resp structs.HostDataResponse
err := s.RPC("Agent.Host", &req, &resp)
if tc.ExpectedErr != "" {
require.Equal(t, tc.ExpectedErr, err.Error())
} else {
require.NoError(t, err)
require.NotNil(t, resp.HostData)
}
})
}
}
func TestAgentHost_ACLDebugRequired(t *testing.T) {
ci.Parallel(t)
// start server
s, cleanupS := TestServer(t, func(c *Config) {
c.EnableDebug = false
})
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)
req := structs.HostDataRequest{
QueryOptions: structs.QueryOptions{
Namespace: structs.DefaultNamespace,
Region: "global",
},
}
var resp structs.HostDataResponse
err := s.RPC("Agent.Host", &req, &resp)
require.Equal(t, "Permission denied", err.Error())
}