From 9e869c52fa5dacca7e1d4a35db11d45b65fcf9a5 Mon Sep 17 00:00:00 2001 From: davidadeleon <56207066+davidadeleon@users.noreply.github.com> Date: Fri, 6 May 2022 16:04:08 -0400 Subject: [PATCH] Add DR Metric scraping capability to debug command (#15316) * Add server information as well as ability to collect metrics from DR secondary * Update debug docs Adding additional information around ability to gather metrics from DR secondary * Fix broken link in updated doc * Create 15316.txt Create changelog entry * Fix Formatting * Update website/content/docs/commands/debug.mdx Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Update changelog/15316.txt Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Trigger Build Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> --- changelog/15316.txt | 3 +++ command/debug.go | 35 ++++++++++++++----------- website/content/docs/commands/debug.mdx | 3 +++ 3 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 changelog/15316.txt diff --git a/changelog/15316.txt b/changelog/15316.txt new file mode 100644 index 000000000..d87190049 --- /dev/null +++ b/changelog/15316.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli/debug: added support for retrieving metrics from DR clusters if `unauthenticated_metrics_access` is enabled +``` diff --git a/command/debug.go b/command/debug.go index 838bd4037..e46aa2cef 100644 --- a/command/debug.go +++ b/command/debug.go @@ -58,6 +58,7 @@ type debugIndex struct { Version int `json:"version"` VaultAddress string `json:"vault_address"` ClientVersion string `json:"client_version"` + ServerVersion string `json:"server_version"` Timestamp time.Time `json:"timestamp"` DurationSeconds int `json:"duration_seconds"` IntervalSeconds int `json:"interval_seconds"` @@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int { c.UI.Output("==> Starting debug capture...") c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress)) c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion)) + c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion)) c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration)) c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval)) c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval)) @@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) { if err != nil { return "", fmt.Errorf("unable to create client to connect to Vault: %s", err) } - if _, err := client.Sys().Health(); err != nil { + serverHealth, err := client.Sys().Health() + if err != nil { return "", fmt.Errorf("unable to connect to the server: %s", err) } + + // Check if server is DR Secondary and we need to further + // ignore any targets due to endpoint restrictions + if serverHealth.ReplicationDRMode == "secondary" { + invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true) + if len(invalidDRTargets) != 0 { + c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", "))) + c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true) + } + } c.cachedClient = client captureTime := time.Now().UTC() @@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) { c.debugIndex = &debugIndex{ VaultAddress: client.Address(), ClientVersion: version.GetVersion().VersionNumber(), + ServerVersion: serverHealth.Version, Compress: c.flagCompress, DurationSeconds: int(c.flagDuration.Seconds()), IntervalSeconds: int(c.flagInterval.Seconds()), @@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string { return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"} } +func (c *DebugCommand) validDRSecondaryTargets() []string { + return []string{"metrics", "replication-status", "server-status"} +} + func (c *DebugCommand) captureStaticTargets() error { // Capture configuration state if strutil.StrListContains(c.flagTargets, "config") { @@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) { c.logger.Info("capturing metrics", "count", idxCount) idxCount++ - healthStatus, err := c.cachedClient.Sys().Health() - if err != nil { - c.captureError("metrics", err) - continue - } - - // Check replication status. We skip on processing metrics if we're one - // a DR node, though non-perf standbys will fail if they aren't using - // unauthenticated_metrics_access. - switch { - case healthStatus.ReplicationDRMode == "secondary": - c.logger.Info("skipping metrics capture on DR secondary node") - continue - } - // Perform metrics request r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics") resp, err := c.cachedClient.RawRequestWithContext(ctx, r) diff --git a/website/content/docs/commands/debug.mdx b/website/content/docs/commands/debug.mdx index 13c34bb38..f82460d24 100644 --- a/website/content/docs/commands/debug.mdx +++ b/website/content/docs/commands/debug.mdx @@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded. Additionally, host information is not available on the OpenBSD platform due to library limitations in fetching the data without enabling `cgo`. +[Enterprise] Telemetry can be gathered from a DR Secondary active node via the +`metrics` target if [unauthenticated_metrics_access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled. + ## Output Layout The output of the bundled information, once decompressed, is contained within a