diff --git a/changelog/15316.txt b/changelog/15316.txt new file mode 100644 index 000000000..d87190049 --- /dev/null +++ b/changelog/15316.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli/debug: added support for retrieving metrics from DR clusters if `unauthenticated_metrics_access` is enabled +``` diff --git a/command/debug.go b/command/debug.go index 838bd4037..e46aa2cef 100644 --- a/command/debug.go +++ b/command/debug.go @@ -58,6 +58,7 @@ type debugIndex struct { Version int `json:"version"` VaultAddress string `json:"vault_address"` ClientVersion string `json:"client_version"` + ServerVersion string `json:"server_version"` Timestamp time.Time `json:"timestamp"` DurationSeconds int `json:"duration_seconds"` IntervalSeconds int `json:"interval_seconds"` @@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int { c.UI.Output("==> Starting debug capture...") c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress)) c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion)) + c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion)) c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration)) c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval)) c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval)) @@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) { if err != nil { return "", fmt.Errorf("unable to create client to connect to Vault: %s", err) } - if _, err := client.Sys().Health(); err != nil { + serverHealth, err := client.Sys().Health() + if err != nil { return "", fmt.Errorf("unable to connect to the server: %s", err) } + + // Check if server is DR Secondary and we need to further + // ignore any targets due to endpoint restrictions + if serverHealth.ReplicationDRMode == "secondary" { + invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true) + if len(invalidDRTargets) != 0 { + c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", "))) + c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true) + } + } c.cachedClient = client captureTime := time.Now().UTC() @@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) { c.debugIndex = &debugIndex{ VaultAddress: client.Address(), ClientVersion: version.GetVersion().VersionNumber(), + ServerVersion: serverHealth.Version, Compress: c.flagCompress, DurationSeconds: int(c.flagDuration.Seconds()), IntervalSeconds: int(c.flagInterval.Seconds()), @@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string { return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"} } +func (c *DebugCommand) validDRSecondaryTargets() []string { + return []string{"metrics", "replication-status", "server-status"} +} + func (c *DebugCommand) captureStaticTargets() error { // Capture configuration state if strutil.StrListContains(c.flagTargets, "config") { @@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) { c.logger.Info("capturing metrics", "count", idxCount) idxCount++ - healthStatus, err := c.cachedClient.Sys().Health() - if err != nil { - c.captureError("metrics", err) - continue - } - - // Check replication status. We skip on processing metrics if we're one - // a DR node, though non-perf standbys will fail if they aren't using - // unauthenticated_metrics_access. - switch { - case healthStatus.ReplicationDRMode == "secondary": - c.logger.Info("skipping metrics capture on DR secondary node") - continue - } - // Perform metrics request r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics") resp, err := c.cachedClient.RawRequestWithContext(ctx, r) diff --git a/website/content/docs/commands/debug.mdx b/website/content/docs/commands/debug.mdx index 13c34bb38..f82460d24 100644 --- a/website/content/docs/commands/debug.mdx +++ b/website/content/docs/commands/debug.mdx @@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded. Additionally, host information is not available on the OpenBSD platform due to library limitations in fetching the data without enabling `cgo`. +[Enterprise] Telemetry can be gathered from a DR Secondary active node via the +`metrics` target if [unauthenticated_metrics_access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled. + ## Output Layout The output of the bundled information, once decompressed, is contained within a