open-nomad/command/operator_debug_test.go
Seth Hoenig 9eb2433871
command: fixup parsing of stale query parameter (#15631)
In #15605 we fixed the bug where the presense of "stale" query parameter
was mean to imply stale, even if the value of the parameter was "false"
or malformed. In parsing, we missed the case where the slice of values
would be nil which lead to a failing test case that was missed because
CI didn't run against the original PR.
2023-01-03 08:21:20 -06:00

1071 lines
31 KiB
Go

package command
import (
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"regexp"
"strings"
"testing"
"time"
consulapi "github.com/hashicorp/consul/api"
consultest "github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
clienttest "github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/testutil"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// NOTE: most of these tests cannot be run in parallel
type testCase struct {
name string
args []string
expectedCode int
expectedOutputs []string
expectedError string
}
type testCases []testCase
func runTestCases(t *testing.T, cases testCases) {
t.Helper()
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
code := cmd.Run(c.args)
out := ui.OutputWriter.String()
outerr := ui.ErrorWriter.String()
assert.Equalf(t, c.expectedCode, code, "did not get expected exit code")
if len(c.expectedOutputs) > 0 {
if assert.NotEmpty(t, out, "command output was empty") {
for _, expectedOutput := range c.expectedOutputs {
assert.Contains(t, out, expectedOutput, "did not get expected output")
}
}
} else {
assert.Empty(t, out, "command output should have been empty")
}
if c.expectedError == "" {
assert.Empty(t, outerr, "got unexpected error")
} else {
assert.Containsf(t, outerr, c.expectedError, "did not get expected error")
}
})
}
}
func newClientAgentConfigFunc(region string, nodeClass string, srvRPCAddr string) func(*agent.Config) {
if region == "" {
region = "global"
}
return func(c *agent.Config) {
c.Region = region
c.Client.NodeClass = nodeClass
c.Client.Servers = []string{srvRPCAddr}
c.Client.Enabled = true
c.Server.Enabled = false
}
}
func TestDebug_NodeClass(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Retrieve server RPC address to join clients
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Start test clients
testClient(t, "client1", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
testClient(t, "client2", newClientAgentConfigFunc("global", "classB", srvRPCAddr))
testClient(t, "client3", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
// Setup test cases
cases := testCases{
{
name: "address=api, node-class=classA, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classA", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (2/3)",
"Max node count reached (2)",
"Node Class: classA",
"Created debug archive",
},
expectedError: "",
},
{
name: "address=api, node-class=classB, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classB", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (1/3)",
"Node Class: classB",
"Created debug archive",
},
expectedError: "",
},
}
runTestCases(t, cases)
}
func TestDebug_ClientToServer(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Retrieve server RPC address to join client
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Start client
agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc("", "", srvRPCAddr))
// Get API addresses
addrServer := srv.HTTPAddr()
addrClient1 := agent1.HTTPAddr()
t.Logf("testAgent api address: %s", url)
t.Logf("Server api address: %s", addrServer)
t.Logf("Client1 api address: %s", addrClient1)
// Setup test cases
var cases = testCases{
{
name: "testAgent api server",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "server address",
args: []string{"-address", addrServer, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "client1 address - verify no SIGSEGV panic",
args: []string{"-address", addrClient1, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
}
runTestCases(t, cases)
}
func TestDebug_MultiRegion(t *testing.T) {
region1 := "region1"
region2 := "region2"
// Start region1 server
server1, _, addrServer1 := testServer(t, false, func(c *agent.Config) { c.Region = region1 })
testutil.WaitForLeader(t, server1.Agent.RPC)
rpcAddrServer1 := server1.GetConfig().AdvertiseAddrs.RPC
t.Logf("%s: Leader started, HTTPAddr: %s, RPC: %s", region1, addrServer1, rpcAddrServer1)
// Start region1 client
agent1, _, addrClient1 := testClient(t, "client1", newClientAgentConfigFunc(region1, "", rpcAddrServer1))
nodeIdClient1 := agent1.Agent.Client().NodeID()
t.Logf("%s: Client1 started, ID: %s, HTTPAddr: %s", region1, nodeIdClient1, addrClient1)
// Start region2 server
server2, _, addrServer2 := testServer(t, false, func(c *agent.Config) { c.Region = region2 })
testutil.WaitForLeader(t, server2.Agent.RPC)
rpcAddrServer2 := server2.GetConfig().AdvertiseAddrs.RPC
t.Logf("%s: Leader started, HTTPAddr: %s, RPC: %s", region2, addrServer2, rpcAddrServer2)
// Start client2
agent2, _, addrClient2 := testClient(t, "client2", newClientAgentConfigFunc(region2, "", rpcAddrServer2))
nodeIdClient2 := agent2.Agent.Client().NodeID()
t.Logf("%s: Client1 started, ID: %s, HTTPAddr: %s", region2, nodeIdClient2, addrClient2)
t.Logf("Region: %s, Server1 api address: %s", region1, addrServer1)
t.Logf("Region: %s, Client1 api address: %s", region1, addrClient1)
t.Logf("Region: %s, Server2 api address: %s", region2, addrServer2)
t.Logf("Region: %s, Client2 api address: %s", region2, addrClient2)
// Setup test cases
var cases = testCases{
// Good
{
name: "no region - all servers, all clients",
args: []string{"-address", addrServer1, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Starting debugger"},
},
{
name: "region1 - server1 address",
args: []string{"-address", addrServer1, "-region", region1, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region1 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region1]",
"Clients: (1/1) [" + nodeIdClient1 + "]",
"Created debug archive",
},
},
{
name: "region1 - client1 address",
args: []string{"-address", addrClient1, "-region", region1, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region1 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region1]",
"Clients: (1/1) [" + nodeIdClient1 + "]",
"Created debug archive",
},
},
{
name: "region2 - server2 address",
args: []string{"-address", addrServer2, "-region", region2, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region2 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region2]",
"Clients: (1/1) [" + nodeIdClient2 + "]",
"Created debug archive",
},
},
{
name: "region2 - client2 address",
args: []string{"-address", addrClient2, "-region", region2, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region2 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region2]",
"Clients: (1/1) [" + nodeIdClient2 + "]",
"Created debug archive",
},
},
// Bad
{
name: "invalid region - all servers, all clients",
args: []string{"-address", addrServer1, "-region", "never", "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 1,
expectedError: "500 (No path to region)",
},
}
runTestCases(t, cases)
}
func TestDebug_SingleServer(t *testing.T) {
srv, _, url := testServer(t, false, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
{
name: "address=api, server-id=leader",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (0/0)",
"Created debug archive",
},
expectedError: "No node(s) with prefix",
},
{
name: "address=api, server-id=all",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (0/0)",
"Created debug archive",
},
expectedError: "No node(s) with prefix",
},
}
runTestCases(t, cases)
}
func TestDebug_Failures(t *testing.T) {
srv, _, url := testServer(t, false, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
{
name: "fails incorrect args",
args: []string{"some", "bad", "args"},
expectedCode: 1,
expectedError: "This command takes no arguments",
},
{
name: "Fails illegal node ids",
args: []string{"-node-id", "foo:bar"},
expectedCode: 1,
expectedError: "Error querying node info",
},
{
name: "Fails missing node ids",
args: []string{"-node-id", "abc,def", "-duration", "250ms", "-interval", "250ms"},
expectedCode: 1,
expectedError: "Error querying node info",
},
{
name: "Fails bad durations",
args: []string{"-duration", "foo"},
expectedCode: 1,
expectedError: "Error parsing duration: foo: time: invalid duration \"foo\""},
{
name: "Fails bad intervals",
args: []string{"-interval", "bar"},
expectedCode: 1,
expectedError: "Error parsing interval: bar: time: invalid duration \"bar\"",
},
{
name: "Fails intervals greater than duration",
args: []string{"-duration", "5m", "-interval", "10m"},
expectedCode: 1,
expectedError: "Error parsing interval: 10m is greater than duration 5m",
},
{
name: "Fails bad pprof duration",
args: []string{"-pprof-duration", "baz"},
expectedCode: 1,
expectedError: "Error parsing pprof duration: baz: time: invalid duration \"baz\"",
},
{
name: "Fails bad pprof interval",
args: []string{"-pprof-interval", "bar"},
expectedCode: 1,
expectedError: "Error parsing pprof-interval: bar: time: invalid duration \"bar\"",
},
{
name: "Fails bad address",
args: []string{"-address", url + "bogus"},
expectedCode: 1,
expectedError: "invalid address",
},
}
runTestCases(t, cases)
}
func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
cases := []string{
"aws/ebs",
"gcp-*-1",
}
for _, pluginName := range cases {
cleanup := state.CreateTestCSIPlugin(srv.Agent.Server().State(), pluginName)
defer cleanup()
}
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir := t.TempDir()
defer os.Remove(testDir)
// Debug on the leader and all client nodes
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader", "-node-id", "all", "-output", testDir})
assert.Equal(t, 0, code)
// Bad plugin name should be escaped before it reaches the sandbox test
require.NotContains(t, ui.ErrorWriter.String(), "file path escapes capture directory")
require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
path := cmd.collectDir
var pluginFiles []string
for _, pluginName := range cases {
pluginFile := fmt.Sprintf("csi-plugin-id-%s.json", helper.CleanFilename(pluginName, "_"))
pluginFile = filepath.Join(path, intervalDir, "0000", pluginFile)
pluginFiles = append(pluginFiles, pluginFile)
}
testutil.WaitForFiles(t, pluginFiles)
}
func buildPathSlice(path string, files []string) []string {
paths := []string{}
for _, file := range files {
paths = append(paths, filepath.Join(path, file))
}
return paths
}
func TestDebug_CapturedFiles(t *testing.T) {
srv, _, url := testServer(t, true, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
serverNodeName := srv.Config.NodeName
region := srv.Config.Region
serverName := fmt.Sprintf("%s.%s", serverNodeName, region)
clientID := srv.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.Client().RPC, clientID, srv.Agent.Client().Region())
t.Logf("serverName: %s, clientID, %s", serverName, clientID)
// Setup file slices
clusterFiles := []string{
"agent-self.json",
"members.json",
"namespaces.json",
"regions.json",
}
pprofFiles := []string{
"allocs.prof",
"goroutine-debug1.txt",
"goroutine-debug2.txt",
"goroutine.prof",
"heap.prof",
"profile_0000.prof",
"threadcreate.prof",
"trace.prof",
}
clientFiles := []string{
"agent-host.json",
"monitor.log",
}
clientFiles = append(clientFiles, pprofFiles...)
serverFiles := []string{
"agent-host.json",
"monitor.log",
}
serverFiles = append(serverFiles, pprofFiles...)
intervalFiles := []string{
"allocations.json",
"csi-plugins.json",
"csi-volumes.json",
"deployments.json",
"evaluations.json",
"jobs.json",
"license.json",
"metrics.json",
"nodes.json",
"operator-autopilot-health.json",
"operator-raft.json",
"operator-scheduler.json",
}
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir := t.TempDir()
defer os.Remove(testDir)
duration := 2 * time.Second
interval := 750 * time.Millisecond
waitTime := 2 * duration
code := cmd.Run([]string{
"-address", url,
"-output", testDir,
"-server-id", serverName,
"-node-id", clientID,
"-duration", duration.String(),
"-interval", interval.String(),
})
// There should be no errors
require.Empty(t, ui.ErrorWriter.String())
require.Equal(t, 0, code)
ui.ErrorWriter.Reset()
// Verify cluster files
clusterPaths := buildPathSlice(cmd.path(clusterDir), clusterFiles)
t.Logf("Waiting for cluster files in path: %s", clusterDir)
testutil.WaitForFilesUntil(t, clusterPaths, waitTime)
// Verify client files
clientPaths := buildPathSlice(cmd.path(clientDir, clientID), clientFiles)
t.Logf("Waiting for client files in path: %s", clientDir)
testutil.WaitForFilesUntil(t, clientPaths, waitTime)
// Verify server files
serverPaths := buildPathSlice(cmd.path(serverDir, serverName), serverFiles)
t.Logf("Waiting for server files in path: %s", serverDir)
testutil.WaitForFilesUntil(t, serverPaths, waitTime)
// Verify interval 0000 files
intervalPaths0 := buildPathSlice(cmd.path(intervalDir, "0000"), intervalFiles)
t.Logf("Waiting for interval 0000 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths0, waitTime)
// Verify interval 0001 files
intervalPaths1 := buildPathSlice(cmd.path(intervalDir, "0001"), intervalFiles)
t.Logf("Waiting for interval 0001 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths1, waitTime)
}
func TestDebug_ExistingOutput(t *testing.T) {
ci.Parallel(t)
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Fails existing output
format := "2006-01-02-150405Z"
stamped := "nomad-debug-" + time.Now().UTC().Format(format)
tempDir := t.TempDir()
path := filepath.Join(tempDir, stamped)
os.MkdirAll(path, 0755)
defer os.Remove(tempDir)
code := cmd.Run([]string{"-output", tempDir, "-duration", "50ms", "-interval", "50ms"})
require.Equal(t, 2, code)
}
func TestDebug_Fail_Pprof(t *testing.T) {
// Setup agent config with debug endpoints disabled
agentConfFunc := func(c *agent.Config) {
c.EnableDebug = false
}
// Start test server and API client
srv, _, url := testServer(t, false, agentConfFunc)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Debug on server with endpoints disabled
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"})
assert.Equal(t, 0, code) // Pprof failure isn't fatal
require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
require.Contains(t, ui.ErrorWriter.String(), "Failed to retrieve pprof") // Should report pprof failure
require.Contains(t, ui.ErrorWriter.String(), "Permission denied") // Specifically permission denied
require.Contains(t, ui.OutputWriter.String(), "Created debug archive") // Archive should be generated anyway
}
// TestDebug_PprofVersionCheck asserts that only versions < 0.12.0 are
// filtered by the version constraint.
func TestDebug_PprofVersionCheck(t *testing.T) {
cases := []struct {
version string
errMsg string
}{
{"0.8.7", ""},
{"0.11.1", "unsupported version=0.11.1 matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.2", "unsupported version=0.11.2 matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.2+ent", "unsupported version=0.11.2+ent matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.3", ""},
{"0.11.3+ent", ""},
{"0.12.0", ""},
{"1.3.0", ""},
{"foo.bar", "error: Malformed version: foo.bar"},
}
for _, tc := range cases {
t.Run(tc.version, func(t *testing.T) {
err := checkVersion(tc.version, minimumVersionPprofConstraint)
if tc.errMsg == "" {
require.NoError(t, err, "expected no error from %s", tc.version)
} else {
require.EqualError(t, err, tc.errMsg)
}
})
}
}
func TestDebug_StringToSlice(t *testing.T) {
ci.Parallel(t)
cases := []struct {
input string
expected []string
}{
{input: ",,", expected: []string(nil)},
{input: "", expected: []string(nil)},
{input: "foo, bar", expected: []string{"foo", "bar"}},
{input: " foo, bar ", expected: []string{"foo", "bar"}},
{input: "foo,,bar", expected: []string{"foo", "bar"}},
}
for _, tc := range cases {
out := stringToSlice(tc.input)
require.Equal(t, tc.expected, out)
}
}
func TestDebug_External(t *testing.T) {
ci.Parallel(t)
// address calculation honors CONSUL_HTTP_SSL
// ssl: true - Correct alignment
e := &external{addrVal: "https://127.0.0.1:8500", ssl: true}
addr := e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: true - protocol incorrect
// NOTE: Address with protocol now overrides ssl flag
e = &external{addrVal: "http://127.0.0.1:8500", ssl: true}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// ssl: true - protocol missing
e = &external{addrVal: "127.0.0.1:8500", ssl: true}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: false - correct alignment
e = &external{addrVal: "http://127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// ssl: false - protocol incorrect
// NOTE: Address with protocol now overrides ssl flag
e = &external{addrVal: "https://127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: false - protocol missing
e = &external{addrVal: "127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// Address through proxy might not have a port
e = &external{addrVal: "https://127.0.0.1", ssl: true}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1", addr)
}
func TestDebug_WriteBytes_Nil(t *testing.T) {
ci.Parallel(t)
var testDir, testFile, testPath string
var testBytes []byte
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir = t.TempDir()
defer os.Remove(testDir)
cmd.collectDir = testDir
testFile = "test_nil.json"
testPath = filepath.Join(testDir, testFile)
// Write nil file at top level of collect directory
err := cmd.writeBytes("", testFile, testBytes)
require.NoError(t, err)
require.FileExists(t, testPath)
}
func TestDebug_WriteBytes_PathEscapesSandbox(t *testing.T) {
ci.Parallel(t)
var testDir, testFile string
var testBytes []byte
testDir = t.TempDir()
defer os.Remove(testDir)
testFile = "testing.json"
testPath := filepath.Join(testDir, testFile)
defer os.Remove(testPath)
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Empty collectDir will always appear to be escaped
cmd.collectDir = ""
err := cmd.writeBytes(testDir, testFile, testBytes)
require.Error(t, err)
}
func TestDebug_CollectConsul(t *testing.T) {
ci.Parallel(t)
if testing.Short() {
t.Skip("-short set; skipping")
}
// Skip test if Consul binary cannot be found
clienttest.RequireConsul(t)
// Create an embedded Consul server
testconsul, err := consultest.NewTestServerConfigT(t, func(c *consultest.TestServerConfig) {
c.Peering = nil // fix for older versions of Consul (<1.13.0) that don't support peering
// If -v wasn't specified squelch consul logging
if !testing.Verbose() {
c.Stdout = ioutil.Discard
c.Stderr = ioutil.Discard
}
})
require.NoError(t, err)
if err != nil {
t.Fatalf("error starting test consul server: %v", err)
}
defer testconsul.Stop()
consulConfig := consulapi.DefaultConfig()
consulConfig.Address = testconsul.HTTPAddr
// Setup mock UI
ui := cli.NewMockUi()
c := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Setup Consul *external
ce := &external{}
ce.setAddr(consulConfig.Address)
if ce.ssl {
ce.tls = &api.TLSConfig{}
}
// Set global client
c.consul = ce
// Setup capture directory
testDir := t.TempDir()
defer os.Remove(testDir)
c.collectDir = testDir
// Collect data from Consul into folder "test"
c.collectConsul("test")
require.Empty(t, ui.ErrorWriter.String())
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-host.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-members.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-metrics.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-leader.json"))
}
func TestDebug_CollectVault(t *testing.T) {
ci.Parallel(t)
if testing.Short() {
t.Skip("-short set; skipping")
}
// Skip test if Consul binary cannot be found
clienttest.RequireVault(t)
// Create a Vault server
v := testutil.NewTestVault(t)
defer v.Stop()
// Setup mock UI
ui := cli.NewMockUi()
c := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Setup Vault *external
ve := &external{}
ve.tokenVal = v.RootToken
ve.setAddr(v.HTTPAddr)
if ve.ssl {
ve.tls = &api.TLSConfig{}
}
// Set global client
c.vault = ve
// Set capture directory
testDir := t.TempDir()
defer os.Remove(testDir)
c.collectDir = testDir
// Collect data from Vault
err := c.collectVault("test", "")
require.NoError(t, err)
require.Empty(t, ui.ErrorWriter.String())
require.FileExists(t, filepath.Join(testDir, "test", "vault-sys-health.json"))
}
// TestDebug_RedirectError asserts that redirect errors are detected so they
// can be translated into more understandable output.
func TestDebug_RedirectError(t *testing.T) {
ci.Parallel(t)
// Create a test server that always returns the error many versions of
// Nomad return instead of a 404 for unknown paths.
// 1st request redirects to /ui/
// 2nd request returns UI's HTML
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.String(), "/ui/") {
fmt.Fprintln(w, `<html>Fake UI HTML</html>`)
return
}
w.Header().Set("Location", "/ui/")
w.WriteHeader(307)
fmt.Fprintln(w, `<a href="/ui/">Temporary Redirect</a>.`)
}))
defer ts.Close()
config := api.DefaultConfig()
config.Address = ts.URL
client, err := api.NewClient(config)
require.NoError(t, err)
resp, err := client.Agent().Host("abc", "", nil)
assert.Nil(t, resp)
assert.True(t, isRedirectError(err), err.Error())
}
// TestDebug_StaleLeadership verifies that APIs that are required to
// complete a debug run have their query options configured with the
// -stale flag
func TestDebug_StaleLeadership(t *testing.T) {
srv, _, url := testServerWithoutLeader(t, false, nil)
addrServer := srv.HTTPAddr()
t.Logf("testAgent api address: %s", url)
t.Logf("Server api address: %s", addrServer)
var cases = testCases{
{
name: "no leader without stale flag",
args: []string{"-address", addrServer,
"-duration", "250ms", "-interval", "250ms",
"-server-id", "all", "-node-id", "all"},
expectedCode: 1,
expectedError: "No cluster leader",
},
{
name: "no leader with stale flag",
args: []string{
"-address", addrServer,
"-duration", "250ms", "-interval", "250ms",
"-server-id", "all", "-node-id", "all",
"-stale"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
expectedError: "No node(s) with prefix", // still exits 0
},
}
runTestCases(t, cases)
}
func testServerWithoutLeader(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
// Make a new test server
a := agent.NewTestAgent(t, t.Name(), func(config *agent.Config) {
config.Client.Enabled = runClient
config.Server.Enabled = true
config.Server.NumSchedulers = pointer.Of(0)
config.Server.BootstrapExpect = 3
if cb != nil {
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
c := a.Client()
return a, c, a.HTTPAddr()
}
// testOutput is used to receive test output from a channel
type testOutput struct {
name string
code int
output string
error string
}
func TestDebug_EventStream_TopicsFromString(t *testing.T) {
ci.Parallel(t)
cases := []struct {
name string
topicList string
want map[api.Topic][]string
}{
{
name: "topics = all",
topicList: "all",
want: allTopics(),
},
{
name: "topics = none",
topicList: "none",
want: nil,
},
{
name: "two topics",
topicList: "Deployment,Job",
want: map[api.Topic][]string{
"Deployment": {"*"},
"Job": {"*"},
},
},
{
name: "multiple topics and filters (using api const)",
topicList: "Evaluation:example,Job:*,Node:*",
want: map[api.Topic][]string{
api.TopicEvaluation: {"example"},
api.TopicJob: {"*"},
api.TopicNode: {"*"},
},
},
{
name: "capitalize topics",
topicList: "evaluation:example,job:*,node:*",
want: map[api.Topic][]string{
api.TopicEvaluation: {"example"},
api.TopicJob: {"*"},
api.TopicNode: {"*"},
},
},
{
name: "all topics for filterKey",
topicList: "*:example",
want: map[api.Topic][]string{
"*": {"example"},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, err := topicsFromString(tc.topicList)
require.NoError(t, err)
require.Equal(t, tc.want, got)
})
}
}
func TestDebug_EventStream(t *testing.T) {
ci.Parallel(t)
// TODO dmay: specify output directory to allow inspection of eventstream.json
// TODO dmay: require specific events in the eventstream.json file(s)
// TODO dmay: scenario where no events are expected, verify "No events captured"
// TODO dmay: verify event topic filtering only includes expected events
start := time.Now()
// Start test server
srv, client, url := testServer(t, true, nil)
t.Logf("%s: test server started, waiting for leadership to establish\n", time.Since(start))
// Ensure leader is ready
testutil.WaitForLeader(t, srv.Agent.RPC)
t.Logf("%s: Leadership established\n", time.Since(start))
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Return command output back to the main test goroutine
chOutput := make(chan testOutput)
// Set duration for capture
duration := 5 * time.Second
// Fail with timeout if duration is exceeded by 5 seconds
timeout := duration + 5*time.Second
// Run debug in a goroutine so we can start the capture before we run the test job
t.Logf("%s: Starting nomad operator debug in goroutine\n", time.Since(start))
go func() {
code := cmd.Run([]string{"-address", url, "-duration", duration.String(), "-interval", "5s", "-event-topic", "Job:*"})
assert.Equal(t, 0, code)
chOutput <- testOutput{
name: "yo",
code: code,
output: ui.OutputWriter.String(),
error: ui.ErrorWriter.String(),
}
}()
// Start test job
t.Logf("%s: Running test job\n", time.Since(start))
job := testJob("event_stream_test")
resp, _, err := client.Jobs().Register(job, nil)
t.Logf("%s: Test job started\n", time.Since(start))
// Ensure job registered
require.NoError(t, err)
// Wait for the job to complete
if code := waitForSuccess(ui, client, fullId, t, resp.EvalID); code != 0 {
switch code {
case 1:
t.Fatalf("status code 1: All other failures (API connectivity, internal errors, etc)\n")
case 2:
t.Fatalf("status code 2: Problem scheduling job (impossible constraints, resources exhausted, etc)\n")
default:
t.Fatalf("status code non zero saw %d\n", code)
}
}
t.Logf("%s: test job is complete, eval id: %s\n", time.Since(start), resp.EvalID)
// Capture the output struct from nomad operator debug goroutine
var testOut testOutput
select {
case testOut = <-chOutput:
t.Logf("%s: goroutine is complete", time.Since(start))
case <-time.After(timeout):
t.Fatalf("timed out waiting for event stream event (duration: %s, timeout: %s", duration, timeout)
}
t.Logf("Values from struct -- code: %d, len(out): %d, len(outerr): %d\n", testOut.code, len(testOut.output), len(testOut.error))
require.Empty(t, testOut.error)
archive := extractArchiveName(testOut.output)
require.NotEmpty(t, archive)
fmt.Println(archive)
// TODO dmay: verify evenstream.json output file contains expected content
}
// extractArchiveName searches string s for the archive filename
func extractArchiveName(captureOutput string) string {
file := ""
r := regexp.MustCompile(`Created debug archive: (.+)?\n`)
res := r.FindStringSubmatch(captureOutput)
// If found, there will be 2 elements, where element [1] is the desired text from the submatch
if len(res) == 2 {
file = res[1]
}
return file
}