From 2d14c54fa09f92a9ed7a38004c145dd36c84000e Mon Sep 17 00:00:00 2001 From: Dave May Date: Tue, 12 Oct 2021 16:58:41 -0400 Subject: [PATCH] debug: Improve namespace and region support (#11269) * Include region and namespace in CLI output * Add region and prefix matching for server members * Add namespace and region API outputs to cluster metadata folder * Add region awareness to WaitForClient helper function * Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice * Refactor test client agent generation * Add tests for region * Add changelog --- .changelog/11269.txt | 3 + command/operator_debug.go | 69 +++++++-- command/operator_debug_test.go | 209 ++++++++++++++++------------ command/util_test.go | 19 +++ helper/funcs.go | 20 +++ helper/funcs_test.go | 28 ++++ nomad/client_agent_endpoint_test.go | 2 +- testutil/wait.go | 11 +- 8 files changed, 258 insertions(+), 103 deletions(-) create mode 100644 .changelog/11269.txt diff --git a/.changelog/11269.txt b/.changelog/11269.txt new file mode 100644 index 000000000..59cbd29b4 --- /dev/null +++ b/.changelog/11269.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: Improve debug namespace and region support +``` diff --git a/command/operator_debug.go b/command/operator_debug.go index 41fdbe744..97385bf3a 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -329,7 +329,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { nodeLookupFailCount := 0 nodeCaptureCount := 0 - for _, id := range argNodes(nodeIDs) { + for _, id := range stringToSlice(nodeIDs) { if id == "all" { // Capture from all nodes using empty prefix filter id = "" @@ -382,15 +382,15 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.Ui.Error(fmt.Sprintf("Failed to retrieve server list; err: %v", err)) return 1 } + + // Write complete list of server members to file c.writeJSON("version", "members.json", members, err) - // We always write the error to the file, but don't range if no members found - if serverIDs == "all" && members != nil { - // Special case to capture from all servers - for _, member := range members.Members { - c.serverIDs = append(c.serverIDs, member.Name) - } - } else { - c.serverIDs = append(c.serverIDs, argNodes(serverIDs)...) + + // Filter for servers matching criteria + c.serverIDs, err = filterServerMembers(members, serverIDs, c.region) + if err != nil { + c.Ui.Error(fmt.Sprintf("Failed to parse server list; err: %v", err)) + return 1 } serversFound := 0 @@ -412,6 +412,8 @@ func (c *OperatorDebugCommand) Run(args []string) int { // Display general info about the capture c.Ui.Output("Starting debugger...") c.Ui.Output("") + c.Ui.Output(fmt.Sprintf(" Region: %s", c.region)) + c.Ui.Output(fmt.Sprintf(" Namespace: %s", c.namespace)) c.Ui.Output(fmt.Sprintf(" Servers: (%d/%d) %v", serverCaptureCount, serversFound, c.serverIDs)) c.Ui.Output(fmt.Sprintf(" Clients: (%d/%d) %v", nodeCaptureCount, nodesFound, c.nodeIDs)) if nodeCaptureCount > 0 && nodeCaptureCount == c.maxNodes { @@ -468,6 +470,13 @@ func (c *OperatorDebugCommand) collect(client *api.Client) error { self, err := client.Agent().Self() c.writeJSON(dir, "agent-self.json", self, err) + var qo *api.QueryOptions + namespaces, _, err := client.Namespaces().List(qo) + c.writeJSON(dir, "namespaces.json", namespaces, err) + + regions, err := client.Regions().List() + c.writeJSON(dir, "regions.json", regions, err) + // Fetch data directly from consul and vault. Ignore errors var consul, vault string @@ -1055,8 +1064,46 @@ func TarCZF(archive string, src, target string) error { }) } -// argNodes splits node ids from the command line by "," -func argNodes(input string) []string { +// filterServerMembers returns a slice of server member names matching the search criteria +func filterServerMembers(serverMembers *api.ServerMembers, serverIDs string, region string) (membersFound []string, err error) { + if serverMembers.Members == nil { + return nil, fmt.Errorf("Failed to parse server members, members==nil") + } + + prefixes := stringToSlice(serverIDs) + + // "leader" is a special case which Nomad handles in the API. If "leader" + // appears in serverIDs, add it to membersFound and remove it from the list + // so that it isn't processed by the range loop + if helper.SliceStringContains(prefixes, "leader") { + membersFound = append(membersFound, "leader") + helper.RemoveEqualFold(&prefixes, "leader") + } + + for _, member := range serverMembers.Members { + // If region is provided it must match exactly + if region != "" && member.Tags["region"] != region { + continue + } + + // Always include "all" + if serverIDs == "all" { + membersFound = append(membersFound, member.Name) + continue + } + + // Include member if name matches any prefix from serverIDs + if helper.StringHasPrefixInSlice(member.Name, prefixes) { + membersFound = append(membersFound, member.Name) + } + } + + return membersFound, nil +} + +// stringToSlice splits comma-separated input string into slice, trims +// whitespace, and prunes empty values +func stringToSlice(input string) []string { ns := strings.Split(input, ",") var out []string for _, n := range ns { diff --git a/command/operator_debug_test.go b/command/operator_debug_test.go index a5aa6f88d..92b5fdb60 100644 --- a/command/operator_debug_test.go +++ b/command/operator_debug_test.go @@ -50,11 +50,23 @@ func runTestCases(t *testing.T, cases testCases) { }) } } +func newClientAgentConfigFunc(region string, nodeClass string, srvRPCAddr string) func(*agent.Config) { + if region == "" { + region = "global" + } + + return func(c *agent.Config) { + c.Region = region + c.Client.NodeClass = nodeClass + c.Client.Servers = []string{srvRPCAddr} + c.Client.Enabled = true + c.Server.Enabled = false + } +} func TestDebug_NodeClass(t *testing.T) { // Start test server and API client srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() // Wait for leadership to establish testutil.WaitForLeader(t, srv.Agent.RPC) @@ -63,81 +75,34 @@ func TestDebug_NodeClass(t *testing.T) { srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) - // Setup client 1 (nodeclass = clienta) - agentConfFunc1 := func(c *agent.Config) { - c.Region = "global" - c.Server.Enabled = false - c.Client.NodeClass = "clienta" - c.Client.Enabled = true - c.Client.Servers = []string{srvRPCAddr} - } - - // Start client 1 - client1 := agent.NewTestAgent(t, "client1", agentConfFunc1) - defer client1.Shutdown() - - // Wait for client1 to connect - client1NodeID := client1.Agent.Client().NodeID() - testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID) - t.Logf("[TEST] Client1 ready, id: %s", client1NodeID) - - // Setup client 2 (nodeclass = clientb) - agentConfFunc2 := func(c *agent.Config) { - c.Region = "global" - c.Server.Enabled = false - c.Client.NodeClass = "clientb" - c.Client.Enabled = true - c.Client.Servers = []string{srvRPCAddr} - } - - // Start client 2 - client2 := agent.NewTestAgent(t, "client2", agentConfFunc2) - defer client2.Shutdown() - - // Wait for client2 to connect - client2NodeID := client2.Agent.Client().NodeID() - testutil.WaitForClient(t, srv.Agent.RPC, client2NodeID) - t.Logf("[TEST] Client2 ready, id: %s", client2NodeID) - - // Setup client 3 (nodeclass = clienta) - agentConfFunc3 := func(c *agent.Config) { - c.Server.Enabled = false - c.Client.NodeClass = "clienta" - c.Client.Servers = []string{srvRPCAddr} - } - - // Start client 3 - client3 := agent.NewTestAgent(t, "client3", agentConfFunc3) - defer client3.Shutdown() - - // Wait for client3 to connect - client3NodeID := client3.Agent.Client().NodeID() - testutil.WaitForClient(t, srv.Agent.RPC, client3NodeID) - t.Logf("[TEST] Client3 ready, id: %s", client3NodeID) + // Start test clients + testClient(t, "client1", newClientAgentConfigFunc("global", "classA", srvRPCAddr)) + testClient(t, "client2", newClientAgentConfigFunc("global", "classB", srvRPCAddr)) + testClient(t, "client3", newClientAgentConfigFunc("global", "classA", srvRPCAddr)) // Setup test cases cases := testCases{ { - name: "address=api, node-class=clienta, max-nodes=2", - args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"}, + name: "address=api, node-class=classA, max-nodes=2", + args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classA", "-max-nodes", "2"}, expectedCode: 0, expectedOutputs: []string{ "Servers: (1/1)", "Clients: (2/3)", "Max node count reached (2)", - "Node Class: clienta", + "Node Class: classA", "Created debug archive", }, expectedError: "", }, { - name: "address=api, node-class=clientb, max-nodes=2", - args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"}, + name: "address=api, node-class=classB, max-nodes=2", + args: []string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "classB", "-max-nodes", "2"}, expectedCode: 0, expectedOutputs: []string{ "Servers: (1/1)", "Clients: (1/3)", - "Node Class: clientb", + "Node Class: classB", "Created debug archive", }, expectedError: "", @@ -150,7 +115,6 @@ func TestDebug_NodeClass(t *testing.T) { func TestDebug_ClientToServer(t *testing.T) { // Start test server and API client srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() // Wait for leadership to establish testutil.WaitForLeader(t, srv.Agent.RPC) @@ -159,27 +123,12 @@ func TestDebug_ClientToServer(t *testing.T) { srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) - // Setup client 1 (nodeclass = clienta) - agentConfFunc1 := func(c *agent.Config) { - c.Region = "global" - c.Server.Enabled = false - c.Client.NodeClass = "clienta" - c.Client.Enabled = true - c.Client.Servers = []string{srvRPCAddr} - } - - // Start client 1 - client1 := agent.NewTestAgent(t, "client1", agentConfFunc1) - defer client1.Shutdown() - - // Wait for client 1 to connect - client1NodeID := client1.Agent.Client().NodeID() - testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID) - t.Logf("[TEST] Client1 ready, id: %s", client1NodeID) + // Start client + agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc("", "", srvRPCAddr)) // Get API addresses addrServer := srv.HTTPAddr() - addrClient1 := client1.HTTPAddr() + addrClient1 := agent1.HTTPAddr() t.Logf("[TEST] testAgent api address: %s", url) t.Logf("[TEST] Server api address: %s", addrServer) @@ -210,9 +159,83 @@ func TestDebug_ClientToServer(t *testing.T) { runTestCases(t, cases) } +func TestDebug_ClientToServer_Region(t *testing.T) { + region := "testregion" + + // Start test server and API client + srv, _, url := testServer(t, false, func(c *agent.Config) { + c.Region = region + }) + + // Wait for leadership to establish + testutil.WaitForLeader(t, srv.Agent.RPC) + + // Retrieve server RPC address to join client + srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC + t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) + + // Start client + agent1, _, _ := testClient(t, "client1", newClientAgentConfigFunc(region, "", srvRPCAddr)) + + // Get API addresses + addrServer := srv.HTTPAddr() + addrClient1 := agent1.HTTPAddr() + + t.Logf("[TEST] testAgent api address: %s", url) + t.Logf("[TEST] Server api address: %s", addrServer) + t.Logf("[TEST] Client1 api address: %s", addrClient1) + + // Setup test cases + var cases = testCases{ + // Good + { + name: "region - testAgent api server", + args: []string{"-address", url, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"}, + expectedCode: 0, + expectedOutputs: []string{ + "Region: " + region + "\n", + "Servers: (1/1)", + "Clients: (1/1)", + "Created debug archive", + }, + }, + { + name: "region - server address", + args: []string{"-address", addrServer, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"}, + expectedCode: 0, + expectedOutputs: []string{ + "Region: " + region + "\n", + "Servers: (1/1)", + "Clients: (1/1)", + "Created debug archive", + }, + }, + { + name: "region - client1 address - verify no SIGSEGV panic", + args: []string{"-address", addrClient1, "-region", region, "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"}, + expectedCode: 0, + expectedOutputs: []string{ + "Region: " + region + "\n", + "Servers: (1/1)", + "Clients: (1/1)", + "Created debug archive", + }, + }, + + // Bad + { + name: "invalid region - all servers, all clients", + args: []string{"-address", url, "-region", "never", "-duration", "250ms", "-interval", "250ms", "-server-id", "all", "-node-id", "all"}, + expectedCode: 1, + expectedError: "500 (No path to region)", + }, + } + + runTestCases(t, cases) +} + func TestDebug_SingleServer(t *testing.T) { srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() testutil.WaitForLeader(t, srv.Agent.RPC) var cases = testCases{ @@ -245,7 +268,6 @@ func TestDebug_SingleServer(t *testing.T) { func TestDebug_Failures(t *testing.T) { srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() testutil.WaitForLeader(t, srv.Agent.RPC) var cases = testCases{ @@ -293,7 +315,6 @@ func TestDebug_Failures(t *testing.T) { func TestDebug_Bad_CSIPlugin_Names(t *testing.T) { // Start test server and API client srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() // Wait for leadership to establish testutil.WaitForLeader(t, srv.Agent.RPC) @@ -334,7 +355,6 @@ func TestDebug_Bad_CSIPlugin_Names(t *testing.T) { func TestDebug_CapturedFiles(t *testing.T) { srv, _, url := testServer(t, false, nil) - defer srv.Shutdown() testutil.WaitForLeader(t, srv.Agent.RPC) ui := cli.NewMockUi() @@ -413,7 +433,6 @@ func TestDebug_Fail_Pprof(t *testing.T) { // Start test server and API client srv, _, url := testServer(t, false, agentConfFunc) - defer srv.Shutdown() // Wait for leadership to establish testutil.WaitForLeader(t, srv.Agent.RPC) @@ -422,7 +441,7 @@ func TestDebug_Fail_Pprof(t *testing.T) { ui := cli.NewMockUi() cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} - // Debug on client - node class = "clienta" + // Debug on server with endpoints disabled code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-interval", "250ms", "-server-id", "all"}) assert.Equal(t, 0, code) // Pprof failure isn't fatal @@ -432,15 +451,27 @@ func TestDebug_Fail_Pprof(t *testing.T) { require.Contains(t, ui.OutputWriter.String(), "Created debug archive") // Archive should be generated anyway } -func TestDebug_Utils(t *testing.T) { +func TestDebug_StringToSlice(t *testing.T) { t.Parallel() - xs := argNodes("foo, bar") - require.Equal(t, []string{"foo", "bar"}, xs) + cases := []struct { + input string + expected []string + }{ + {input: ",,", expected: []string(nil)}, + {input: "", expected: []string(nil)}, + {input: "foo, bar", expected: []string{"foo", "bar"}}, + {input: " foo, bar ", expected: []string{"foo", "bar"}}, + {input: "foo,,bar", expected: []string{"foo", "bar"}}, + } + for _, tc := range cases { + out := stringToSlice(tc.input) + require.Equal(t, tc.expected, out) + } +} - xs = argNodes("") - require.Len(t, xs, 0) - require.Empty(t, xs) +func TestDebug_External(t *testing.T) { + t.Parallel() // address calculation honors CONSUL_HTTP_SSL // ssl: true - Correct alignment diff --git a/command/util_test.go b/command/util_test.go index e1d961614..d1fd96a45 100644 --- a/command/util_test.go +++ b/command/util_test.go @@ -6,6 +6,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/command/agent" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/testutil" ) func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) { @@ -23,6 +24,24 @@ func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.Te return a, c, a.HTTPAddr() } +// testClient starts a new test client, blocks until it joins, and performs +// cleanup after the test is complete. +func testClient(t *testing.T, name string, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) { + t.Logf("[TEST] Starting client agent %s", name) + a := agent.NewTestAgent(t, name, func(config *agent.Config) { + if cb != nil { + cb(config) + } + }) + t.Cleanup(func() { a.Shutdown() }) + + c := a.Client() + t.Logf("[TEST] Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers) + testutil.WaitForClient(t, a.Agent.RPC, a.Agent.Client().NodeID(), a.Agent.Client().Region()) + + return a, c, a.HTTPAddr() +} + func testJob(jobID string) *api.Job { task := api.NewTask("task1", "mock_driver"). SetConfig("kill_after", "1s"). diff --git a/helper/funcs.go b/helper/funcs.go index 690ee3f7c..f55266819 100644 --- a/helper/funcs.go +++ b/helper/funcs.go @@ -197,6 +197,26 @@ func SliceStringContains(list []string, item string) bool { return false } +// SliceStringHasPrefix returns true if any string in list starts with prefix +func SliceStringHasPrefix(list []string, prefix string) bool { + for _, s := range list { + if strings.HasPrefix(s, prefix) { + return true + } + } + return false +} + +// StringHasPrefixInSlice returns true if string starts with any prefix in list +func StringHasPrefixInSlice(s string, prefixes []string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(s, prefix) { + return true + } + } + return false +} + func SliceSetDisjoint(first, second []string) (bool, []string) { contained := make(map[string]struct{}, len(first)) for _, k := range first { diff --git a/helper/funcs_test.go b/helper/funcs_test.go index d6ec74b91..265e3f479 100644 --- a/helper/funcs_test.go +++ b/helper/funcs_test.go @@ -34,6 +34,34 @@ func TestSliceStringContains(t *testing.T) { require.False(t, SliceStringContains(list, "d")) } +func TestSliceStringHasPrefix(t *testing.T) { + list := []string{"alpha", "bravo", "charlie", "definitely", "most definitely"} + // At least one string in the slice above starts with the following test prefix strings + require.True(t, SliceStringHasPrefix(list, "a")) + require.True(t, SliceStringHasPrefix(list, "b")) + require.True(t, SliceStringHasPrefix(list, "c")) + require.True(t, SliceStringHasPrefix(list, "d")) + require.True(t, SliceStringHasPrefix(list, "mos")) + require.True(t, SliceStringHasPrefix(list, "def")) + require.False(t, SliceStringHasPrefix(list, "delta")) + +} + +func TestStringHasPrefixInSlice(t *testing.T) { + prefixes := []string{"a", "b", "c", "definitely", "most definitely"} + // The following strings all start with at least one prefix in the slice above + require.True(t, StringHasPrefixInSlice("alpha", prefixes)) + require.True(t, StringHasPrefixInSlice("bravo", prefixes)) + require.True(t, StringHasPrefixInSlice("charlie", prefixes)) + require.True(t, StringHasPrefixInSlice("definitely", prefixes)) + require.True(t, StringHasPrefixInSlice("most definitely", prefixes)) + + require.False(t, StringHasPrefixInSlice("mos", prefixes)) + require.False(t, StringHasPrefixInSlice("def", prefixes)) + require.False(t, StringHasPrefixInSlice("delta", prefixes)) + +} + func TestCompareTimePtrs(t *testing.T) { t.Run("nil", func(t *testing.T) { a := (*time.Duration)(nil) diff --git a/nomad/client_agent_endpoint_test.go b/nomad/client_agent_endpoint_test.go index 2f737ebff..e3c11ee83 100644 --- a/nomad/client_agent_endpoint_test.go +++ b/nomad/client_agent_endpoint_test.go @@ -548,7 +548,7 @@ func TestAgentProfile_RemoteClient(t *testing.T) { }) defer cleanupC() - testutil.WaitForClient(t, s2.RPC, c.NodeID()) + testutil.WaitForClient(t, s2.RPC, c.NodeID(), c.Region()) testutil.WaitForResult(func() (bool, error) { nodes := s2.connectedNodes() return len(nodes) == 1, nil diff --git a/testutil/wait.go b/testutil/wait.go index 19215d0ce..fee5b6b05 100644 --- a/testutil/wait.go +++ b/testutil/wait.go @@ -112,12 +112,17 @@ func WaitForLeader(t testing.TB, rpc rpcFn) { } // WaitForClient blocks until the client can be found -func WaitForClient(t testing.TB, rpc rpcFn, nodeID string) { +func WaitForClient(t testing.TB, rpc rpcFn, nodeID string, region string) { + t.Helper() + + if region == "" { + region = "global" + } WaitForResult(func() (bool, error) { req := structs.NodeSpecificRequest{ NodeID: nodeID, - QueryOptions: structs.QueryOptions{Region: "global"}, + QueryOptions: structs.QueryOptions{Region: region}, } var out structs.SingleNodeResponse @@ -132,6 +137,8 @@ func WaitForClient(t testing.TB, rpc rpcFn, nodeID string) { }, func(err error) { t.Fatalf("failed to find node: %v", err) }) + + t.Logf("[TEST] Client for test %s ready, id: %s, region: %s", t.Name(), nodeID, region) } // WaitForVotingMembers blocks until autopilot promotes all server peers