open-nomad/command/operator_debug_test.go

1074 lines
31 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package command
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"regexp"
"strings"
"testing"
"time"
consulapi "github.com/hashicorp/consul/api"
consultest "github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
clienttest "github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/testutil"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// NOTE: most of these tests cannot be run in parallel
type testCase struct {
name string
args []string
expectedCode int
expectedOutputs []string
expectedError string
}
type testCases []testCase
func runTestCases(t *testing.T, cases testCases) {
t.Helper()
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
code := cmd.Run(c.args)
out := ui.OutputWriter.String()
outerr := ui.ErrorWriter.String()
assert.Equalf(t, c.expectedCode, code, "did not get expected exit code")
if len(c.expectedOutputs) > 0 {
if assert.NotEmpty(t, out, "command output was empty") {
for _, expectedOutput := range c.expectedOutputs {
assert.Contains(t, out, expectedOutput, "did not get expected output")
}
}
} else {
assert.Empty(t, out, "command output should have been empty")
}
if c.expectedError == "" {
assert.Empty(t, outerr, "got unexpected error")
} else {
assert.Containsf(t, outerr, c.expectedError, "did not get expected error")
}
})
}
}
func newClientAgentConfigFunc(region string, nodeClass string, srvRPCAddr string) func(*agent.Config) {
if region == "" {
region = "global"
}
return func(c *agent.Config) {
c.Region = region
c.Client.NodeClass = nodeClass
c.Client.Servers = []string{srvRPCAddr}
c.Client.Enabled = true
c.Server.Enabled = false
}
}
func TestDebug_NodeClass(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Retrieve server RPC address to join clients
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Start test clients
testClient(t, "client1", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
testClient(t, "client2", newClientAgentConfigFunc("global", "classB", srvRPCAddr))
testClient(t, "client3", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
// Setup test cases
cases := testCases{
{
name: "address=api, node-class=classA, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classA", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (2/3)",
"Max node count reached (2)",
"Node Class: classA",
"Created debug archive",
},
expectedError: "",
},
{
name: "address=api, node-class=classB, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classB", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (1/3)",
"Node Class: classB",
"Created debug archive",
},
expectedError: "",
},
}
runTestCases(t, cases)
}
func TestDebug_ClientToServer(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Retrieve server RPC address to join client
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Start client
agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc("", "", srvRPCAddr))
// Get API addresses
addrServer := srv.HTTPAddr()
addrClient1 := agent1.HTTPAddr()
t.Logf("testAgent api address: %s", url)
t.Logf("Server api address: %s", addrServer)
t.Logf("Client1 api address: %s", addrClient1)
// Setup test cases
var cases = testCases{
{
name: "testAgent api server",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "server address",
args: []string{"-address", addrServer, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
{
name: "client1 address - verify no SIGSEGV panic",
args: []string{"-address", addrClient1, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
},
}
runTestCases(t, cases)
}
func TestDebug_MultiRegion(t *testing.T) {
region1 := "region1"
region2 := "region2"
// Start region1 server
server1, _, addrServer1 := testServer(t, false, func(c *agent.Config) { c.Region = region1 })
testutil.WaitForLeader(t, server1.Agent.RPC)
rpcAddrServer1 := server1.GetConfig().AdvertiseAddrs.RPC
t.Logf("%s: Leader started, HTTPAddr: %s, RPC: %s", region1, addrServer1, rpcAddrServer1)
// Start region1 client
agent1, _, addrClient1 := testClient(t, "client1", newClientAgentConfigFunc(region1, "", rpcAddrServer1))
nodeIdClient1 := agent1.Agent.Client().NodeID()
t.Logf("%s: Client1 started, ID: %s, HTTPAddr: %s", region1, nodeIdClient1, addrClient1)
// Start region2 server
server2, _, addrServer2 := testServer(t, false, func(c *agent.Config) { c.Region = region2 })
testutil.WaitForLeader(t, server2.Agent.RPC)
rpcAddrServer2 := server2.GetConfig().AdvertiseAddrs.RPC
t.Logf("%s: Leader started, HTTPAddr: %s, RPC: %s", region2, addrServer2, rpcAddrServer2)
// Start client2
agent2, _, addrClient2 := testClient(t, "client2", newClientAgentConfigFunc(region2, "", rpcAddrServer2))
nodeIdClient2 := agent2.Agent.Client().NodeID()
t.Logf("%s: Client1 started, ID: %s, HTTPAddr: %s", region2, nodeIdClient2, addrClient2)
t.Logf("Region: %s, Server1 api address: %s", region1, addrServer1)
t.Logf("Region: %s, Client1 api address: %s", region1, addrClient1)
t.Logf("Region: %s, Server2 api address: %s", region2, addrServer2)
t.Logf("Region: %s, Client2 api address: %s", region2, addrClient2)
// Setup test cases
var cases = testCases{
// Good
{
name: "no region - all servers, all clients",
args: []string{"-address", addrServer1, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{"Starting debugger"},
},
{
name: "region1 - server1 address",
args: []string{"-address", addrServer1, "-region", region1, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region1 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region1]",
"Clients: (1/1) [" + nodeIdClient1 + "]",
"Created debug archive",
},
},
{
name: "region1 - client1 address",
args: []string{"-address", addrClient1, "-region", region1, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region1 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region1]",
"Clients: (1/1) [" + nodeIdClient1 + "]",
"Created debug archive",
},
},
{
name: "region2 - server2 address",
args: []string{"-address", addrServer2, "-region", region2, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region2 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region2]",
"Clients: (1/1) [" + nodeIdClient2 + "]",
"Created debug archive",
},
},
{
name: "region2 - client2 address",
args: []string{"-address", addrClient2, "-region", region2, "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region2 + "\n",
"Servers: (1/1) [TestDebug_MultiRegion.region2]",
"Clients: (1/1) [" + nodeIdClient2 + "]",
"Created debug archive",
},
},
// Bad
{
name: "invalid region - all servers, all clients",
args: []string{"-address", addrServer1, "-region", "never", "-duration", "50ms", "-interval", "50ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 1,
expectedError: "500 (No path to region)",
},
}
runTestCases(t, cases)
}
func TestDebug_SingleServer(t *testing.T) {
srv, _, url := testServer(t, false, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
{
name: "address=api, server-id=leader",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (0/0)",
"Created debug archive",
},
expectedError: "No node(s) with prefix",
},
{
name: "address=api, server-id=all",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (0/0)",
"Created debug archive",
},
expectedError: "No node(s) with prefix",
},
}
runTestCases(t, cases)
}
func TestDebug_Failures(t *testing.T) {
srv, _, url := testServer(t, false, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
{
name: "fails incorrect args",
args: []string{"some", "bad", "args"},
expectedCode: 1,
expectedError: "This command takes no arguments",
},
{
name: "Fails illegal node ids",
args: []string{"-node-id", "foo:bar"},
expectedCode: 1,
expectedError: "Error querying node info",
},
{
name: "Fails missing node ids",
args: []string{"-node-id", "abc,def", "-duration", "250ms", "-interval", "250ms"},
expectedCode: 1,
expectedError: "Error querying node info",
},
{
name: "Fails bad durations",
args: []string{"-duration", "foo"},
expectedCode: 1,
expectedError: "Error parsing duration: foo: time: invalid duration \"foo\""},
{
name: "Fails bad intervals",
args: []string{"-interval", "bar"},
expectedCode: 1,
expectedError: "Error parsing interval: bar: time: invalid duration \"bar\"",
},
{
name: "Fails intervals greater than duration",
args: []string{"-duration", "5m", "-interval", "10m"},
expectedCode: 1,
expectedError: "Error parsing interval: 10m is greater than duration 5m",
},
{
name: "Fails bad pprof duration",
args: []string{"-pprof-duration", "baz"},
expectedCode: 1,
expectedError: "Error parsing pprof duration: baz: time: invalid duration \"baz\"",
},
{
name: "Fails bad pprof interval",
args: []string{"-pprof-interval", "bar"},
expectedCode: 1,
expectedError: "Error parsing pprof-interval: bar: time: invalid duration \"bar\"",
},
{
name: "Fails bad address",
args: []string{"-address", url + "bogus"},
expectedCode: 1,
expectedError: "invalid address",
},
}
runTestCases(t, cases)
}
func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
cases := []string{
"aws/ebs",
"gcp-*-1",
}
for _, pluginName := range cases {
cleanup := state.CreateTestCSIPlugin(srv.Agent.Server().State(), pluginName)
defer cleanup()
}
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir := t.TempDir()
defer os.Remove(testDir)
// Debug on the leader and all client nodes
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "leader", "-node-id", "all", "-output", testDir})
assert.Equal(t, 0, code)
// Bad plugin name should be escaped before it reaches the sandbox test
require.NotContains(t, ui.ErrorWriter.String(), "file path escapes capture directory")
require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
path := cmd.collectDir
var pluginFiles []string
for _, pluginName := range cases {
pluginFile := fmt.Sprintf("csi-plugin-id-%s.json", helper.CleanFilename(pluginName, "_"))
pluginFile = filepath.Join(path, intervalDir, "0000", pluginFile)
pluginFiles = append(pluginFiles, pluginFile)
}
testutil.WaitForFiles(t, pluginFiles)
}
func buildPathSlice(path string, files []string) []string {
paths := []string{}
for _, file := range files {
paths = append(paths, filepath.Join(path, file))
}
return paths
}
func TestDebug_CapturedFiles(t *testing.T) {
srv, _, url := testServer(t, true, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
serverNodeName := srv.Config.NodeName
region := srv.Config.Region
serverName := fmt.Sprintf("%s.%s", serverNodeName, region)
clientID := srv.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.Client().RPC, clientID, srv.Agent.Client().Region())
t.Logf("serverName: %s, clientID, %s", serverName, clientID)
// Setup file slices
clusterFiles := []string{
"agent-self.json",
"members.json",
"namespaces.json",
"regions.json",
}
pprofFiles := []string{
"allocs.prof",
"goroutine-debug1.txt",
"goroutine-debug2.txt",
"goroutine.prof",
"heap.prof",
"profile_0000.prof",
"threadcreate.prof",
"trace.prof",
}
clientFiles := []string{
"agent-host.json",
"monitor.log",
}
clientFiles = append(clientFiles, pprofFiles...)
serverFiles := []string{
"agent-host.json",
"monitor.log",
}
serverFiles = append(serverFiles, pprofFiles...)
intervalFiles := []string{
"allocations.json",
"csi-plugins.json",
"csi-volumes.json",
"deployments.json",
"evaluations.json",
"jobs.json",
"license.json",
"metrics.json",
"nodes.json",
"operator-autopilot-health.json",
"operator-raft.json",
"operator-scheduler.json",
}
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir := t.TempDir()
defer os.Remove(testDir)
duration := 2 * time.Second
interval := 750 * time.Millisecond
waitTime := 2 * duration
code := cmd.Run([]string{
"-address", url,
"-output", testDir,
"-server-id", serverName,
"-node-id", clientID,
"-duration", duration.String(),
"-interval", interval.String(),
})
// There should be no errors
require.Empty(t, ui.ErrorWriter.String())
require.Equal(t, 0, code)
ui.ErrorWriter.Reset()
// Verify cluster files
clusterPaths := buildPathSlice(cmd.path(clusterDir), clusterFiles)
t.Logf("Waiting for cluster files in path: %s", clusterDir)
testutil.WaitForFilesUntil(t, clusterPaths, waitTime)
// Verify client files
clientPaths := buildPathSlice(cmd.path(clientDir, clientID), clientFiles)
t.Logf("Waiting for client files in path: %s", clientDir)
testutil.WaitForFilesUntil(t, clientPaths, waitTime)
// Verify server files
serverPaths := buildPathSlice(cmd.path(serverDir, serverName), serverFiles)
t.Logf("Waiting for server files in path: %s", serverDir)
testutil.WaitForFilesUntil(t, serverPaths, waitTime)
// Verify interval 0000 files
intervalPaths0 := buildPathSlice(cmd.path(intervalDir, "0000"), intervalFiles)
t.Logf("Waiting for interval 0000 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths0, waitTime)
// Verify interval 0001 files
intervalPaths1 := buildPathSlice(cmd.path(intervalDir, "0001"), intervalFiles)
t.Logf("Waiting for interval 0001 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths1, waitTime)
}
func TestDebug_ExistingOutput(t *testing.T) {
ci.Parallel(t)
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Fails existing output
format := "2006-01-02-150405Z"
stamped := "nomad-debug-" + time.Now().UTC().Format(format)
tempDir := t.TempDir()
path := filepath.Join(tempDir, stamped)
os.MkdirAll(path, 0755)
defer os.Remove(tempDir)
code := cmd.Run([]string{"-output", tempDir, "-duration", "50ms", "-interval", "50ms"})
require.Equal(t, 2, code)
}
func TestDebug_Fail_Pprof(t *testing.T) {
// Setup agent config with debug endpoints disabled
agentConfFunc := func(c *agent.Config) {
c.EnableDebug = false
}
// Start test server and API client
srv, _, url := testServer(t, false, agentConfFunc)
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Debug on server with endpoints disabled
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"})
assert.Equal(t, 0, code) // Pprof failure isn't fatal
require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
require.Contains(t, ui.ErrorWriter.String(), "Failed to retrieve pprof") // Should report pprof failure
require.Contains(t, ui.ErrorWriter.String(), "Permission denied") // Specifically permission denied
require.Contains(t, ui.OutputWriter.String(), "Created debug archive") // Archive should be generated anyway
}
// TestDebug_PprofVersionCheck asserts that only versions < 0.12.0 are
// filtered by the version constraint.
func TestDebug_PprofVersionCheck(t *testing.T) {
cases := []struct {
version string
errMsg string
}{
{"0.8.7", ""},
{"0.11.1", "unsupported version=0.11.1 matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.2", "unsupported version=0.11.2 matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.2+ent", "unsupported version=0.11.2+ent matches version filter >= 0.11.0, <= 0.11.2"},
{"0.11.3", ""},
{"0.11.3+ent", ""},
{"0.12.0", ""},
{"1.3.0", ""},
{"foo.bar", "error: Malformed version: foo.bar"},
}
for _, tc := range cases {
t.Run(tc.version, func(t *testing.T) {
err := checkVersion(tc.version, minimumVersionPprofConstraint)
if tc.errMsg == "" {
require.NoError(t, err, "expected no error from %s", tc.version)
} else {
require.EqualError(t, err, tc.errMsg)
}
})
}
}
func TestDebug_StringToSlice(t *testing.T) {
ci.Parallel(t)
cases := []struct {
input string
expected []string
}{
{input: ",,", expected: []string(nil)},
{input: "", expected: []string(nil)},
{input: "foo, bar", expected: []string{"foo", "bar"}},
{input: " foo, bar ", expected: []string{"foo", "bar"}},
{input: "foo,,bar", expected: []string{"foo", "bar"}},
}
for _, tc := range cases {
out := stringToSlice(tc.input)
require.Equal(t, tc.expected, out)
}
}
func TestDebug_External(t *testing.T) {
ci.Parallel(t)
// address calculation honors CONSUL_HTTP_SSL
// ssl: true - Correct alignment
e := &external{addrVal: "https://127.0.0.1:8500", ssl: true}
addr := e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: true - protocol incorrect
// NOTE: Address with protocol now overrides ssl flag
e = &external{addrVal: "http://127.0.0.1:8500", ssl: true}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// ssl: true - protocol missing
e = &external{addrVal: "127.0.0.1:8500", ssl: true}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: false - correct alignment
e = &external{addrVal: "http://127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// ssl: false - protocol incorrect
// NOTE: Address with protocol now overrides ssl flag
e = &external{addrVal: "https://127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1:8500", addr)
// ssl: false - protocol missing
e = &external{addrVal: "127.0.0.1:8500", ssl: false}
addr = e.addr("foo")
require.Equal(t, "http://127.0.0.1:8500", addr)
// Address through proxy might not have a port
e = &external{addrVal: "https://127.0.0.1", ssl: true}
addr = e.addr("foo")
require.Equal(t, "https://127.0.0.1", addr)
}
func TestDebug_WriteBytes_Nil(t *testing.T) {
ci.Parallel(t)
var testDir, testFile, testPath string
var testBytes []byte
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
testDir = t.TempDir()
defer os.Remove(testDir)
cmd.collectDir = testDir
testFile = "test_nil.json"
testPath = filepath.Join(testDir, testFile)
// Write nil file at top level of collect directory
err := cmd.writeBytes("", testFile, testBytes)
require.NoError(t, err)
require.FileExists(t, testPath)
}
func TestDebug_WriteBytes_PathEscapesSandbox(t *testing.T) {
ci.Parallel(t)
var testDir, testFile string
var testBytes []byte
testDir = t.TempDir()
defer os.Remove(testDir)
testFile = "testing.json"
testPath := filepath.Join(testDir, testFile)
defer os.Remove(testPath)
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Empty collectDir will always appear to be escaped
cmd.collectDir = ""
err := cmd.writeBytes(testDir, testFile, testBytes)
require.Error(t, err)
}
func TestDebug_CollectConsul(t *testing.T) {
ci.Parallel(t)
if testing.Short() {
t.Skip("-short set; skipping")
}
// Skip test if Consul binary cannot be found
clienttest.RequireConsul(t)
// Create an embedded Consul server
testconsul, err := consultest.NewTestServerConfigT(t, func(c *consultest.TestServerConfig) {
c.Peering = nil // fix for older versions of Consul (<1.13.0) that don't support peering
// If -v wasn't specified squelch consul logging
if !testing.Verbose() {
c.Stdout = io.Discard
c.Stderr = io.Discard
}
})
require.NoError(t, err)
if err != nil {
t.Fatalf("error starting test consul server: %v", err)
}
defer testconsul.Stop()
consulConfig := consulapi.DefaultConfig()
consulConfig.Address = testconsul.HTTPAddr
// Setup mock UI
ui := cli.NewMockUi()
c := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Setup Consul *external
ce := &external{}
ce.setAddr(consulConfig.Address)
if ce.ssl {
ce.tls = &api.TLSConfig{}
}
// Set global client
c.consul = ce
// Setup capture directory
testDir := t.TempDir()
defer os.Remove(testDir)
c.collectDir = testDir
// Collect data from Consul into folder "test"
c.collectConsul("test")
require.Empty(t, ui.ErrorWriter.String())
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-host.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-members.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-agent-metrics.json"))
require.FileExists(t, filepath.Join(testDir, "test", "consul-leader.json"))
}
func TestDebug_CollectVault(t *testing.T) {
ci.Parallel(t)
if testing.Short() {
t.Skip("-short set; skipping")
}
// Skip test if Consul binary cannot be found
clienttest.RequireVault(t)
// Create a Vault server
v := testutil.NewTestVault(t)
defer v.Stop()
// Setup mock UI
ui := cli.NewMockUi()
c := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Setup Vault *external
ve := &external{}
ve.tokenVal = v.RootToken
ve.setAddr(v.HTTPAddr)
if ve.ssl {
ve.tls = &api.TLSConfig{}
}
// Set global client
c.vault = ve
// Set capture directory
testDir := t.TempDir()
defer os.Remove(testDir)
c.collectDir = testDir
// Collect data from Vault
err := c.collectVault("test", "")
require.NoError(t, err)
require.Empty(t, ui.ErrorWriter.String())
require.FileExists(t, filepath.Join(testDir, "test", "vault-sys-health.json"))
}
// TestDebug_RedirectError asserts that redirect errors are detected so they
// can be translated into more understandable output.
func TestDebug_RedirectError(t *testing.T) {
ci.Parallel(t)
// Create a test server that always returns the error many versions of
// Nomad return instead of a 404 for unknown paths.
// 1st request redirects to /ui/
// 2nd request returns UI's HTML
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.String(), "/ui/") {
fmt.Fprintln(w, `<html>Fake UI HTML</html>`)
return
}
w.Header().Set("Location", "/ui/")
w.WriteHeader(http.StatusTemporaryRedirect)
fmt.Fprintln(w, `<a href="/ui/">Temporary Redirect</a>.`)
}))
defer ts.Close()
config := api.DefaultConfig()
config.Address = ts.URL
client, err := api.NewClient(config)
require.NoError(t, err)
resp, err := client.Agent().Host("abc", "", nil)
assert.Nil(t, resp)
assert.True(t, isRedirectError(err), err.Error())
}
// TestDebug_StaleLeadership verifies that APIs that are required to
// complete a debug run have their query options configured with the
// -stale flag
func TestDebug_StaleLeadership(t *testing.T) {
srv, _, url := testServerWithoutLeader(t, false, nil)
addrServer := srv.HTTPAddr()
t.Logf("testAgent api address: %s", url)
t.Logf("Server api address: %s", addrServer)
var cases = testCases{
{
name: "no leader without stale flag",
args: []string{"-address", addrServer,
"-duration", "250ms", "-interval", "250ms",
"-server-id", "all", "-node-id", "all"},
expectedCode: 1,
expectedError: "No cluster leader",
},
{
name: "no leader with stale flag",
args: []string{
"-address", addrServer,
"-duration", "250ms", "-interval", "250ms",
"-server-id", "all", "-node-id", "all",
"-stale"},
expectedCode: 0,
expectedOutputs: []string{"Created debug archive"},
expectedError: "No node(s) with prefix", // still exits 0
},
}
runTestCases(t, cases)
}
func testServerWithoutLeader(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
// Make a new test server
a := agent.NewTestAgent(t, t.Name(), func(config *agent.Config) {
config.Client.Enabled = runClient
config.Server.Enabled = true
config.Server.NumSchedulers = pointer.Of(0)
config.Server.BootstrapExpect = 3
if cb != nil {
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
c := a.Client()
return a, c, a.HTTPAddr()
}
// testOutput is used to receive test output from a channel
type testOutput struct {
name string
code int
output string
error string
}
func TestDebug_EventStream_TopicsFromString(t *testing.T) {
ci.Parallel(t)
cases := []struct {
name string
topicList string
want map[api.Topic][]string
}{
{
name: "topics = all",
topicList: "all",
want: allTopics(),
},
{
name: "topics = none",
topicList: "none",
want: nil,
},
{
name: "two topics",
topicList: "Deployment,Job",
want: map[api.Topic][]string{
"Deployment": {"*"},
"Job": {"*"},
},
},
{
name: "multiple topics and filters (using api const)",
topicList: "Evaluation:example,Job:*,Node:*",
want: map[api.Topic][]string{
api.TopicEvaluation: {"example"},
api.TopicJob: {"*"},
api.TopicNode: {"*"},
},
},
{
name: "capitalize topics",
topicList: "evaluation:example,job:*,node:*",
want: map[api.Topic][]string{
api.TopicEvaluation: {"example"},
api.TopicJob: {"*"},
api.TopicNode: {"*"},
},
},
{
name: "all topics for filterKey",
topicList: "*:example",
want: map[api.Topic][]string{
"*": {"example"},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, err := topicsFromString(tc.topicList)
require.NoError(t, err)
require.Equal(t, tc.want, got)
})
}
}
func TestDebug_EventStream(t *testing.T) {
ci.Parallel(t)
// TODO dmay: specify output directory to allow inspection of eventstream.json
// TODO dmay: require specific events in the eventstream.json file(s)
// TODO dmay: scenario where no events are expected, verify "No events captured"
// TODO dmay: verify event topic filtering only includes expected events
start := time.Now()
// Start test server
srv, client, url := testServer(t, true, nil)
t.Logf("%s: test server started, waiting for leadership to establish\n", time.Since(start))
// Ensure leader is ready
testutil.WaitForLeader(t, srv.Agent.RPC)
t.Logf("%s: Leadership established\n", time.Since(start))
// Setup mock UI
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Return command output back to the main test goroutine
chOutput := make(chan testOutput)
// Set duration for capture
duration := 5 * time.Second
// Fail with timeout if duration is exceeded by 5 seconds
timeout := duration + 5*time.Second
// Run debug in a goroutine so we can start the capture before we run the test job
t.Logf("%s: Starting nomad operator debug in goroutine\n", time.Since(start))
go func() {
code := cmd.Run([]string{"-address", url, "-duration", duration.String(), "-interval", "5s", "-event-topic", "Job:*"})
assert.Equal(t, 0, code)
chOutput <- testOutput{
name: "yo",
code: code,
output: ui.OutputWriter.String(),
error: ui.ErrorWriter.String(),
}
}()
// Start test job
t.Logf("%s: Running test job\n", time.Since(start))
job := testJob("event_stream_test")
resp, _, err := client.Jobs().Register(job, nil)
t.Logf("%s: Test job started\n", time.Since(start))
// Ensure job registered
require.NoError(t, err)
// Wait for the job to complete
if code := waitForSuccess(ui, client, fullId, t, resp.EvalID); code != 0 {
switch code {
case 1:
t.Fatalf("status code 1: All other failures (API connectivity, internal errors, etc)\n")
case 2:
t.Fatalf("status code 2: Problem scheduling job (impossible constraints, resources exhausted, etc)\n")
default:
t.Fatalf("status code non zero saw %d\n", code)
}
}
t.Logf("%s: test job is complete, eval id: %s\n", time.Since(start), resp.EvalID)
// Capture the output struct from nomad operator debug goroutine
var testOut testOutput
select {
case testOut = <-chOutput:
t.Logf("%s: goroutine is complete", time.Since(start))
case <-time.After(timeout):
t.Fatalf("timed out waiting for event stream event (duration: %s, timeout: %s", duration, timeout)
}
t.Logf("Values from struct -- code: %d, len(out): %d, len(outerr): %d\n", testOut.code, len(testOut.output), len(testOut.error))
require.Empty(t, testOut.error)
archive := extractArchiveName(testOut.output)
require.NotEmpty(t, archive)
fmt.Println(archive)
// TODO dmay: verify evenstream.json output file contains expected content
}
// extractArchiveName searches string s for the archive filename
func extractArchiveName(captureOutput string) string {
file := ""
r := regexp.MustCompile(`Created debug archive: (.+)?\n`)
res := r.FindStringSubmatch(captureOutput)
// If found, there will be 2 elements, where element [1] is the desired text from the submatch
if len(res) == 2 {
file = res[1]
}
return file
}