cli: rename paths in debug bundle for clarity (#11307)

* Rename folders to reflect purpose
* Improve captured files test coverage
* Rename CSI plugins output file
* Add changelog entry
* fix test and make changelog message more explicit

Co-authored-by: Luiz Aoqui <luiz@hashicorp.com>
This commit is contained in:
Dave May 2021-10-13 18:00:55 -04:00 committed by GitHub
parent fa4df28fcd
commit c37a6ed583
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 120 additions and 56 deletions

3
.changelog/11307.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:breaking-change
cli: Renamed folders in `nomad operator debug` bundle for clarity
```

View File

@ -49,7 +49,11 @@ type OperatorDebugCommand struct {
}
const (
userAgent = "nomad operator debug"
userAgent = "nomad operator debug"
clusterDir = "cluster"
clientDir = "client"
serverDir = "server"
intervalDir = "interval"
)
func (c *OperatorDebugCommand) Help() string {
@ -458,7 +462,7 @@ func (c *OperatorDebugCommand) Run(args []string) int {
}
// Write complete list of server members to file
c.writeJSON("version", "members.json", members, err)
c.writeJSON(clusterDir, "members.json", members, err)
// Filter for servers matching criteria
c.serverIDs, err = filterServerMembers(members, serverIDs, c.region)
@ -538,18 +542,17 @@ func (c *OperatorDebugCommand) Run(args []string) int {
// collect collects data from our endpoints and writes the archive bundle
func (c *OperatorDebugCommand) collect(client *api.Client) error {
// Version contains cluster meta information
dir := "version"
// Collect cluster data
self, err := client.Agent().Self()
c.writeJSON(dir, "agent-self.json", self, err)
c.writeJSON(clusterDir, "agent-self.json", self, err)
var qo *api.QueryOptions
namespaces, _, err := client.Namespaces().List(qo)
c.writeJSON(dir, "namespaces.json", namespaces, err)
c.writeJSON(clusterDir, "namespaces.json", namespaces, err)
regions, err := client.Regions().List()
c.writeJSON(dir, "regions.json", regions, err)
c.writeJSON(clusterDir, "regions.json", regions, err)
// Fetch data directly from consul and vault. Ignore errors
var consul, vault string
@ -582,8 +585,8 @@ func (c *OperatorDebugCommand) collect(client *api.Client) error {
}
}
c.collectConsul(dir, consul)
c.collectVault(dir, vault)
c.collectConsul(clusterDir, consul)
c.collectVault(clusterDir, vault)
c.collectAgentHosts(client)
c.collectPprofs(client)
@ -616,11 +619,11 @@ func (c *OperatorDebugCommand) mkdir(paths ...string) error {
// startMonitors starts go routines for each node and client
func (c *OperatorDebugCommand) startMonitors(client *api.Client) {
for _, id := range c.nodeIDs {
go c.startMonitor("client", "node_id", id, client)
go c.startMonitor(clientDir, "node_id", id, client)
}
for _, id := range c.serverIDs {
go c.startMonitor("server", "server_id", id, client)
go c.startMonitor(serverDir, "server_id", id, client)
}
}
@ -664,11 +667,11 @@ func (c *OperatorDebugCommand) startMonitor(path, idKey, nodeID string, client *
// collectAgentHosts calls collectAgentHost for each selected node
func (c *OperatorDebugCommand) collectAgentHosts(client *api.Client) {
for _, n := range c.nodeIDs {
c.collectAgentHost("client", n, client)
c.collectAgentHost(clientDir, n, client)
}
for _, n := range c.serverIDs {
c.collectAgentHost("server", n, client)
c.collectAgentHost(serverDir, n, client)
}
}
@ -676,7 +679,7 @@ func (c *OperatorDebugCommand) collectAgentHosts(client *api.Client) {
func (c *OperatorDebugCommand) collectAgentHost(path, id string, client *api.Client) {
var host *api.HostDataResponse
var err error
if path == "server" {
if path == serverDir {
host, err = client.Agent().Host(id, "", nil)
} else {
host, err = client.Agent().Host("", id, nil)
@ -699,11 +702,11 @@ func (c *OperatorDebugCommand) collectAgentHost(path, id string, client *api.Cli
// collectPprofs captures the /agent/pprof for each listed node
func (c *OperatorDebugCommand) collectPprofs(client *api.Client) {
for _, n := range c.nodeIDs {
c.collectPprof("client", n, client)
c.collectPprof(clientDir, n, client)
}
for _, n := range c.serverIDs {
c.collectPprof("server", n, client)
c.collectPprof(serverDir, n, client)
}
}
@ -711,7 +714,7 @@ func (c *OperatorDebugCommand) collectPprofs(client *api.Client) {
func (c *OperatorDebugCommand) collectPprof(path, id string, client *api.Client) {
pprofDurationSeconds := int(c.pprofDuration.Seconds())
opts := api.PprofOptions{Seconds: pprofDurationSeconds}
if path == "server" {
if path == serverDir {
opts.ServerID = id
} else {
opts.NodeID = id
@ -810,7 +813,7 @@ func (c *OperatorDebugCommand) collectPeriodic(client *api.Client) {
case <-interval:
name = fmt.Sprintf("%04d", intervalCount)
dir = filepath.Join("nomad", name)
dir = filepath.Join(intervalDir, name)
c.Ui.Output(fmt.Sprintf(" Capture interval %s", name))
c.collectNomad(dir, client)
c.collectOperator(dir, client)
@ -859,7 +862,7 @@ func (c *OperatorDebugCommand) collectNomad(dir string, client *api.Client) erro
// CSI Plugins - /v1/plugins?type=csi
ps, _, err := client.CSIPlugins().List(qo)
c.writeJSON(dir, "plugins.json", ps, err)
c.writeJSON(dir, "csi-plugins.json", ps, err)
// CSI Plugin details - /v1/plugin/csi/:plugin_id
for _, p := range ps {

View File

@ -346,68 +346,126 @@ func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
var pluginFiles []string
for _, pluginName := range cases {
pluginFile := fmt.Sprintf("csi-plugin-id-%s.json", helper.CleanFilename(pluginName, "_"))
pluginFile = filepath.Join(path, "nomad", "0000", pluginFile)
pluginFile = filepath.Join(path, intervalDir, "0000", pluginFile)
pluginFiles = append(pluginFiles, pluginFile)
}
testutil.WaitForFiles(t, pluginFiles)
}
func buildPathSlice(path string, files []string) []string {
paths := []string{}
for _, file := range files {
paths = append(paths, filepath.Join(path, file))
}
return paths
}
func TestDebug_CapturedFiles(t *testing.T) {
srv, _, url := testServer(t, false, nil)
srv, _, url := testServer(t, true, nil)
testutil.WaitForLeader(t, srv.Agent.RPC)
serverNodeName := srv.Config.NodeName
region := srv.Config.Region
serverName := fmt.Sprintf("%s.%s", serverNodeName, region)
clientID := srv.Agent.Client().NodeID()
t.Logf("serverName: %s, clientID, %s", serverName, clientID)
// Setup file slices
clusterFiles := []string{
"agent-self.json",
"consul-agent-members.json",
"consul-agent-self.json",
"members.json",
"namespaces.json",
"regions.json",
"vault-sys-health.json",
}
pprofFiles := []string{
"allocs.prof",
"goroutine-debug1.txt",
"goroutine-debug2.txt",
"goroutine.prof",
"heap.prof",
"profile.prof",
"threadcreate.prof",
"trace.prof",
}
clientFiles := []string{
"agent-host.json",
"monitor.log",
}
clientFiles = append(clientFiles, pprofFiles...)
serverFiles := []string{
"agent-host.json",
"monitor.log",
}
serverFiles = append(serverFiles, pprofFiles...)
intervalFiles := []string{
"allocations.json",
"csi-plugins.json",
"csi-volumes.json",
"deployments.json",
"evaluations.json",
"jobs.json",
"license.json",
"metrics.json",
"nodes.json",
"operator-autopilot-health.json",
"operator-raft.json",
"operator-scheduler.json",
}
ui := cli.NewMockUi()
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
code := cmd.Run([]string{
"-address", url,
"-output", os.TempDir(),
"-server-id", "leader",
"-server-id", serverName,
"-node-id", clientID,
"-duration", "1300ms",
"-interval", "600ms",
})
// Get capture directory
path := cmd.collectDir
defer os.Remove(path)
// There should be no errors
require.Empty(t, ui.ErrorWriter.String())
require.Equal(t, 0, code)
ui.ErrorWriter.Reset()
serverFiles := []string{
// Version is always captured
filepath.Join(path, "version", "agent-self.json"),
// Verify cluster files
clusterPaths := buildPathSlice(cmd.path(clusterDir), clusterFiles)
t.Logf("Waiting for cluster files in path: %s", clusterDir)
testutil.WaitForFilesUntil(t, clusterPaths, 2*time.Minute)
// Consul and Vault contain results or errors
filepath.Join(path, "version", "consul-agent-self.json"),
filepath.Join(path, "version", "vault-sys-health.json"),
// Verify client files
clientPaths := buildPathSlice(cmd.path(clientDir, clientID), clientFiles)
t.Logf("Waiting for client files in path: %s", clientDir)
testutil.WaitForFilesUntil(t, clientPaths, 2*time.Minute)
// Monitor files are only created when selected
filepath.Join(path, "server", "leader", "monitor.log"),
// Verify server files
serverPaths := buildPathSlice(cmd.path(serverDir, serverName), serverFiles)
t.Logf("Waiting for server files in path: %s", serverDir)
testutil.WaitForFilesUntil(t, serverPaths, 2*time.Minute)
// Pprof profiles
filepath.Join(path, "server", "leader", "profile.prof"),
filepath.Join(path, "server", "leader", "trace.prof"),
filepath.Join(path, "server", "leader", "goroutine.prof"),
filepath.Join(path, "server", "leader", "goroutine-debug1.txt"),
filepath.Join(path, "server", "leader", "goroutine-debug2.txt"),
filepath.Join(path, "server", "leader", "heap.prof"),
filepath.Join(path, "server", "leader", "allocs.prof"),
filepath.Join(path, "server", "leader", "threadcreate.prof"),
// Verify interval 0000 files
intervalPaths0 := buildPathSlice(cmd.path(intervalDir, "0000"), intervalFiles)
t.Logf("Waiting for interval 0000 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths0, 2*time.Minute)
// Multiple snapshots are collected, 00 is always created
filepath.Join(path, "nomad", "0000", "jobs.json"),
filepath.Join(path, "nomad", "0000", "nodes.json"),
filepath.Join(path, "nomad", "0000", "metrics.json"),
// Multiple snapshots are collected, 01 requires two intervals
filepath.Join(path, "nomad", "0001", "jobs.json"),
filepath.Join(path, "nomad", "0001", "nodes.json"),
filepath.Join(path, "nomad", "0001", "metrics.json"),
}
testutil.WaitForFilesUntil(t, serverFiles, 2*time.Minute)
// Verify interval 0001 files
intervalPaths1 := buildPathSlice(cmd.path(intervalDir, "0001"), intervalFiles)
t.Logf("Waiting for interval 0001 files in path: %s", intervalDir)
testutil.WaitForFilesUntil(t, intervalPaths1, 2*time.Minute)
}
func TestDebug_ExistingOutput(t *testing.T) {

View File

@ -241,7 +241,7 @@ func WaitForRunning(t testing.TB, rpc rpcFn, job *structs.Job) []*structs.AllocL
// WaitForFiles blocks until all the files in the slice are present
func WaitForFiles(t testing.TB, files []string) {
WaitForResult(func() (bool, error) {
return FilesExist(files), nil
return FilesExist(files)
}, func(err error) {
t.Fatalf("missing expected files: %v", err)
})
@ -250,18 +250,18 @@ func WaitForFiles(t testing.TB, files []string) {
// WaitForFilesUntil blocks until duration or all the files in the slice are present
func WaitForFilesUntil(t testing.TB, files []string, until time.Duration) {
WaitForResultUntil(until, func() (bool, error) {
return FilesExist(files), nil
return FilesExist(files)
}, func(err error) {
t.Fatalf("missing expected files: %v", err)
})
}
// FilesExist verifies all files in the slice are present
func FilesExist(files []string) bool {
func FilesExist(files []string) (bool, error) {
for _, f := range files {
if _, err := os.Stat(f); os.IsNotExist(err) {
return false
return false, fmt.Errorf("expected file not found: %v", f)
}
}
return true
return true, nil
}