debug: Improve namespace and region support (#11269)

* Include region and namespace in CLI output
* Add region and prefix matching for server members
* Add namespace and region API outputs to cluster metadata folder
* Add region awareness to WaitForClient helper function
* Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice
* Refactor test client agent generation
* Add tests for region
* Add changelog
This commit is contained in:
Dave May 2021-10-12 16:58:41 -04:00 committed by GitHub
parent 511cae92b4
commit 2d14c54fa0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 258 additions and 103 deletions

3
.changelog/11269.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
cli: Improve debug namespace and region support
```

View File

@ -329,7 +329,7 @@ func (c *OperatorDebugCommand) Run(args []string) int {
nodeLookupFailCount := 0
nodeCaptureCount := 0
for _, id := range argNodes(nodeIDs) {
for _, id := range stringToSlice(nodeIDs) {
if id == "all" {
// Capture from all nodes using empty prefix filter
id = ""
@ -382,15 +382,15 @@ func (c *OperatorDebugCommand) Run(args []string) int {
c.Ui.Error(fmt.Sprintf("Failed to retrieve server list; err: %v", err))
return 1
}
// Write complete list of server members to file
c.writeJSON("version", "members.json", members, err)
// We always write the error to the file, but don't range if no members found
if serverIDs == "all" && members != nil {
// Special case to capture from all servers
for _, member := range members.Members {
c.serverIDs = append(c.serverIDs, member.Name)
}
} else {
c.serverIDs = append(c.serverIDs, argNodes(serverIDs)...)
// Filter for servers matching criteria
c.serverIDs, err = filterServerMembers(members, serverIDs, c.region)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse server list; err: %v", err))
return 1
}
serversFound := 0
@ -412,6 +412,8 @@ func (c *OperatorDebugCommand) Run(args []string) int {
// Display general info about the capture
c.Ui.Output("Starting debugger...")
c.Ui.Output("")
c.Ui.Output(fmt.Sprintf(" Region: %s", c.region))
c.Ui.Output(fmt.Sprintf(" Namespace: %s", c.namespace))
c.Ui.Output(fmt.Sprintf(" Servers: (%d/%d) %v", serverCaptureCount, serversFound, c.serverIDs))
c.Ui.Output(fmt.Sprintf(" Clients: (%d/%d) %v", nodeCaptureCount, nodesFound, c.nodeIDs))
if nodeCaptureCount > 0 && nodeCaptureCount == c.maxNodes {
@ -468,6 +470,13 @@ func (c *OperatorDebugCommand) collect(client *api.Client) error {
self, err := client.Agent().Self()
c.writeJSON(dir, "agent-self.json", self, err)
var qo *api.QueryOptions
namespaces, _, err := client.Namespaces().List(qo)
c.writeJSON(dir, "namespaces.json", namespaces, err)
regions, err := client.Regions().List()
c.writeJSON(dir, "regions.json", regions, err)
// Fetch data directly from consul and vault. Ignore errors
var consul, vault string
@ -1055,8 +1064,46 @@ func TarCZF(archive string, src, target string) error {
})
}
// argNodes splits node ids from the command line by ","
func argNodes(input string) []string {
// filterServerMembers returns a slice of server member names matching the search criteria
func filterServerMembers(serverMembers *api.ServerMembers, serverIDs string, region string) (membersFound []string, err error) {
if serverMembers.Members == nil {
return nil, fmt.Errorf("Failed to parse server members, members==nil")
}
prefixes := stringToSlice(serverIDs)
// "leader" is a special case which Nomad handles in the API. If "leader"
// appears in serverIDs, add it to membersFound and remove it from the list
// so that it isn't processed by the range loop
if helper.SliceStringContains(prefixes, "leader") {
membersFound = append(membersFound, "leader")
helper.RemoveEqualFold(&prefixes, "leader")
}
for _, member := range serverMembers.Members {
// If region is provided it must match exactly
if region != "" && member.Tags["region"] != region {
continue
}
// Always include "all"
if serverIDs == "all" {
membersFound = append(membersFound, member.Name)
continue
}
// Include member if name matches any prefix from serverIDs
if helper.StringHasPrefixInSlice(member.Name, prefixes) {
membersFound = append(membersFound, member.Name)
}
}
return membersFound, nil
}
// stringToSlice splits comma-separated input string into slice, trims
// whitespace, and prunes empty values
func stringToSlice(input string) []string {
ns := strings.Split(input, ",")
var out []string
for _, n := range ns {

View File

@ -50,11 +50,23 @@ func runTestCases(t *testing.T, cases testCases) {
})
}
}
func newClientAgentConfigFunc(region string, nodeClass string, srvRPCAddr string) func(*agent.Config) {
if region == "" {
region = "global"
}
return func(c *agent.Config) {
c.Region = region
c.Client.NodeClass = nodeClass
c.Client.Servers = []string{srvRPCAddr}
c.Client.Enabled = true
c.Server.Enabled = false
}
}
func TestDebug_NodeClass(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
@ -63,81 +75,34 @@ func TestDebug_NodeClass(t *testing.T) {
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Setup client 1 (nodeclass = clienta)
agentConfFunc1 := func(c *agent.Config) {
c.Region = "global"
c.Server.Enabled = false
c.Client.NodeClass = "clienta"
c.Client.Enabled = true
c.Client.Servers = []string{srvRPCAddr}
}
// Start client 1
client1 := agent.NewTestAgent(t, "client1", agentConfFunc1)
defer client1.Shutdown()
// Wait for client1 to connect
client1NodeID := client1.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID)
t.Logf("[TEST] Client1 ready, id: %s", client1NodeID)
// Setup client 2 (nodeclass = clientb)
agentConfFunc2 := func(c *agent.Config) {
c.Region = "global"
c.Server.Enabled = false
c.Client.NodeClass = "clientb"
c.Client.Enabled = true
c.Client.Servers = []string{srvRPCAddr}
}
// Start client 2
client2 := agent.NewTestAgent(t, "client2", agentConfFunc2)
defer client2.Shutdown()
// Wait for client2 to connect
client2NodeID := client2.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.RPC, client2NodeID)
t.Logf("[TEST] Client2 ready, id: %s", client2NodeID)
// Setup client 3 (nodeclass = clienta)
agentConfFunc3 := func(c *agent.Config) {
c.Server.Enabled = false
c.Client.NodeClass = "clienta"
c.Client.Servers = []string{srvRPCAddr}
}
// Start client 3
client3 := agent.NewTestAgent(t, "client3", agentConfFunc3)
defer client3.Shutdown()
// Wait for client3 to connect
client3NodeID := client3.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.RPC, client3NodeID)
t.Logf("[TEST] Client3 ready, id: %s", client3NodeID)
// Start test clients
testClient(t, "client1", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
testClient(t, "client2", newClientAgentConfigFunc("global", "classB", srvRPCAddr))
testClient(t, "client3", newClientAgentConfigFunc("global", "classA", srvRPCAddr))
// Setup test cases
cases := testCases{
{
name: "address=api, node-class=clienta, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"},
name: "address=api, node-class=classA, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classA", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (2/3)",
"Max node count reached (2)",
"Node Class: clienta",
"Node Class: classA",
"Created debug archive",
},
expectedError: "",
},
{
name: "address=api, node-class=clientb, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"},
name: "address=api, node-class=classB, max-nodes=2",
args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classB", "-max-nodes", "2"},
expectedCode: 0,
expectedOutputs: []string{
"Servers: (1/1)",
"Clients: (1/3)",
"Node Class: clientb",
"Node Class: classB",
"Created debug archive",
},
expectedError: "",
@ -150,7 +115,6 @@ func TestDebug_NodeClass(t *testing.T) {
func TestDebug_ClientToServer(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
@ -159,27 +123,12 @@ func TestDebug_ClientToServer(t *testing.T) {
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Setup client 1 (nodeclass = clienta)
agentConfFunc1 := func(c *agent.Config) {
c.Region = "global"
c.Server.Enabled = false
c.Client.NodeClass = "clienta"
c.Client.Enabled = true
c.Client.Servers = []string{srvRPCAddr}
}
// Start client 1
client1 := agent.NewTestAgent(t, "client1", agentConfFunc1)
defer client1.Shutdown()
// Wait for client 1 to connect
client1NodeID := client1.Agent.Client().NodeID()
testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID)
t.Logf("[TEST] Client1 ready, id: %s", client1NodeID)
// Start client
agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc("", "", srvRPCAddr))
// Get API addresses
addrServer := srv.HTTPAddr()
addrClient1 := client1.HTTPAddr()
addrClient1 := agent1.HTTPAddr()
t.Logf("[TEST] testAgent api address: %s", url)
t.Logf("[TEST] Server api address: %s", addrServer)
@ -210,9 +159,83 @@ func TestDebug_ClientToServer(t *testing.T) {
runTestCases(t, cases)
}
func TestDebug_ClientToServer_Region(t *testing.T) {
region := "testregion"
// Start test server and API client
srv, _, url := testServer(t, false, func(c *agent.Config) {
c.Region = region
})
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
// Retrieve server RPC address to join client
srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
// Start client
agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc(region, "", srvRPCAddr))
// Get API addresses
addrServer := srv.HTTPAddr()
addrClient1 := agent1.HTTPAddr()
t.Logf("[TEST] testAgent api address: %s", url)
t.Logf("[TEST] Server api address: %s", addrServer)
t.Logf("[TEST] Client1 api address: %s", addrClient1)
// Setup test cases
var cases = testCases{
// Good
{
name: "region - testAgent api server",
args: []string{"-address", url, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region + "\n",
"Servers: (1/1)",
"Clients: (1/1)",
"Created debug archive",
},
},
{
name: "region - server address",
args: []string{"-address", addrServer, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region + "\n",
"Servers: (1/1)",
"Clients: (1/1)",
"Created debug archive",
},
},
{
name: "region - client1 address - verify no SIGSEGV panic",
args: []string{"-address", addrClient1, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 0,
expectedOutputs: []string{
"Region: " + region + "\n",
"Servers: (1/1)",
"Clients: (1/1)",
"Created debug archive",
},
},
// Bad
{
name: "invalid region - all servers, all clients",
args: []string{"-address", url, "-region", "never", "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"},
expectedCode: 1,
expectedError: "500 (No path to region)",
},
}
runTestCases(t, cases)
}
func TestDebug_SingleServer(t *testing.T) {
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
@ -245,7 +268,6 @@ func TestDebug_SingleServer(t *testing.T) {
func TestDebug_Failures(t *testing.T) {
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
testutil.WaitForLeader(t, srv.Agent.RPC)
var cases = testCases{
@ -293,7 +315,6 @@ func TestDebug_Failures(t *testing.T) {
func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
@ -334,7 +355,6 @@ func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
func TestDebug_CapturedFiles(t *testing.T) {
srv, _, url := testServer(t, false, nil)
defer srv.Shutdown()
testutil.WaitForLeader(t, srv.Agent.RPC)
ui := cli.NewMockUi()
@ -413,7 +433,6 @@ func TestDebug_Fail_Pprof(t *testing.T) {
// Start test server and API client
srv, _, url := testServer(t, false, agentConfFunc)
defer srv.Shutdown()
// Wait for leadership to establish
testutil.WaitForLeader(t, srv.Agent.RPC)
@ -422,7 +441,7 @@ func TestDebug_Fail_Pprof(t *testing.T) {
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
// Debug on client - node class = "clienta"
// Debug on server with endpoints disabled
code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"})
assert.Equal(t, 0, code) // Pprof failure isn't fatal
@ -432,15 +451,27 @@ func TestDebug_Fail_Pprof(t *testing.T) {
require.Contains(t, ui.OutputWriter.String(), "Created debug archive") // Archive should be generated anyway
}
func TestDebug_Utils(t *testing.T) {
func TestDebug_StringToSlice(t *testing.T) {
t.Parallel()
xs := argNodes("foo, bar")
require.Equal(t, []string{"foo", "bar"}, xs)
cases := []struct {
input string
expected []string
}{
{input: ",,", expected: []string(nil)},
{input: "", expected: []string(nil)},
{input: "foo, bar", expected: []string{"foo", "bar"}},
{input: " foo, bar ", expected: []string{"foo", "bar"}},
{input: "foo,,bar", expected: []string{"foo", "bar"}},
}
for _, tc := range cases {
out := stringToSlice(tc.input)
require.Equal(t, tc.expected, out)
}
}
xs = argNodes("")
require.Len(t, xs, 0)
require.Empty(t, xs)
func TestDebug_External(t *testing.T) {
t.Parallel()
// address calculation honors CONSUL_HTTP_SSL
// ssl: true - Correct alignment

View File

@ -6,6 +6,7 @@ import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/testutil"
)
func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
@ -23,6 +24,24 @@ func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.Te
return a, c, a.HTTPAddr()
}
// testClient starts a new test client, blocks until it joins, and performs
// cleanup after the test is complete.
func testClient(t *testing.T, name string, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
t.Logf("[TEST] Starting client agent %s", name)
a := agent.NewTestAgent(t, name, func(config *agent.Config) {
if cb != nil {
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
c := a.Client()
t.Logf("[TEST] Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers)
testutil.WaitForClient(t, a.Agent.RPC, a.Agent.Client().NodeID(), a.Agent.Client().Region())
return a, c, a.HTTPAddr()
}
func testJob(jobID string) *api.Job {
task := api.NewTask("task1", "mock_driver").
SetConfig("kill_after", "1s").

View File

@ -197,6 +197,26 @@ func SliceStringContains(list []string, item string) bool {
return false
}
// SliceStringHasPrefix returns true if any string in list starts with prefix
func SliceStringHasPrefix(list []string, prefix string) bool {
for _, s := range list {
if strings.HasPrefix(s, prefix) {
return true
}
}
return false
}
// StringHasPrefixInSlice returns true if string starts with any prefix in list
func StringHasPrefixInSlice(s string, prefixes []string) bool {
for _, prefix := range prefixes {
if strings.HasPrefix(s, prefix) {
return true
}
}
return false
}
func SliceSetDisjoint(first, second []string) (bool, []string) {
contained := make(map[string]struct{}, len(first))
for _, k := range first {

View File

@ -34,6 +34,34 @@ func TestSliceStringContains(t *testing.T) {
require.False(t, SliceStringContains(list, "d"))
}
func TestSliceStringHasPrefix(t *testing.T) {
list := []string{"alpha", "bravo", "charlie", "definitely", "most definitely"}
// At least one string in the slice above starts with the following test prefix strings
require.True(t, SliceStringHasPrefix(list, "a"))
require.True(t, SliceStringHasPrefix(list, "b"))
require.True(t, SliceStringHasPrefix(list, "c"))
require.True(t, SliceStringHasPrefix(list, "d"))
require.True(t, SliceStringHasPrefix(list, "mos"))
require.True(t, SliceStringHasPrefix(list, "def"))
require.False(t, SliceStringHasPrefix(list, "delta"))
}
func TestStringHasPrefixInSlice(t *testing.T) {
prefixes := []string{"a", "b", "c", "definitely", "most definitely"}
// The following strings all start with at least one prefix in the slice above
require.True(t, StringHasPrefixInSlice("alpha", prefixes))
require.True(t, StringHasPrefixInSlice("bravo", prefixes))
require.True(t, StringHasPrefixInSlice("charlie", prefixes))
require.True(t, StringHasPrefixInSlice("definitely", prefixes))
require.True(t, StringHasPrefixInSlice("most definitely", prefixes))
require.False(t, StringHasPrefixInSlice("mos", prefixes))
require.False(t, StringHasPrefixInSlice("def", prefixes))
require.False(t, StringHasPrefixInSlice("delta", prefixes))
}
func TestCompareTimePtrs(t *testing.T) {
t.Run("nil", func(t *testing.T) {
a := (*time.Duration)(nil)

View File

@ -548,7 +548,7 @@ func TestAgentProfile_RemoteClient(t *testing.T) {
})
defer cleanupC()
testutil.WaitForClient(t, s2.RPC, c.NodeID())
testutil.WaitForClient(t, s2.RPC, c.NodeID(), c.Region())
testutil.WaitForResult(func() (bool, error) {
nodes := s2.connectedNodes()
return len(nodes) == 1, nil

View File

@ -112,12 +112,17 @@ func WaitForLeader(t testing.TB, rpc rpcFn) {
}
// WaitForClient blocks until the client can be found
func WaitForClient(t testing.TB, rpc rpcFn, nodeID string) {
func WaitForClient(t testing.TB, rpc rpcFn, nodeID string, region string) {
t.Helper()
if region == "" {
region = "global"
}
WaitForResult(func() (bool, error) {
req := structs.NodeSpecificRequest{
NodeID: nodeID,
QueryOptions: structs.QueryOptions{Region: "global"},
QueryOptions: structs.QueryOptions{Region: region},
}
var out structs.SingleNodeResponse
@ -132,6 +137,8 @@ func WaitForClient(t testing.TB, rpc rpcFn, nodeID string) {
}, func(err error) {
t.Fatalf("failed to find node: %v", err)
})
t.Logf("[TEST] Client for test %s ready, id: %s, region: %s", t.Name(), nodeID, region)
}
// WaitForVotingMembers blocks until autopilot promotes all server peers