Add DR Metric scraping capability to debug command (#15316)
* Add server information as well as ability to collect metrics from DR secondary * Update debug docs Adding additional information around ability to gather metrics from DR secondary * Fix broken link in updated doc * Create 15316.txt Create changelog entry * Fix Formatting * Update website/content/docs/commands/debug.mdx Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Update changelog/15316.txt Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Trigger Build Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>
This commit is contained in:
parent
4be45db85b
commit
9e869c52fa
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
cli/debug: added support for retrieving metrics from DR clusters if `unauthenticated_metrics_access` is enabled
|
||||
```
|
|
@ -58,6 +58,7 @@ type debugIndex struct {
|
|||
Version int `json:"version"`
|
||||
VaultAddress string `json:"vault_address"`
|
||||
ClientVersion string `json:"client_version"`
|
||||
ServerVersion string `json:"server_version"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
DurationSeconds int `json:"duration_seconds"`
|
||||
IntervalSeconds int `json:"interval_seconds"`
|
||||
|
@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int {
|
|||
c.UI.Output("==> Starting debug capture...")
|
||||
c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress))
|
||||
c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion))
|
||||
c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion))
|
||||
c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration))
|
||||
c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval))
|
||||
c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval))
|
||||
|
@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
|
|||
if err != nil {
|
||||
return "", fmt.Errorf("unable to create client to connect to Vault: %s", err)
|
||||
}
|
||||
if _, err := client.Sys().Health(); err != nil {
|
||||
serverHealth, err := client.Sys().Health()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to connect to the server: %s", err)
|
||||
}
|
||||
|
||||
// Check if server is DR Secondary and we need to further
|
||||
// ignore any targets due to endpoint restrictions
|
||||
if serverHealth.ReplicationDRMode == "secondary" {
|
||||
invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true)
|
||||
if len(invalidDRTargets) != 0 {
|
||||
c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", ")))
|
||||
c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true)
|
||||
}
|
||||
}
|
||||
c.cachedClient = client
|
||||
|
||||
captureTime := time.Now().UTC()
|
||||
|
@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
|
|||
c.debugIndex = &debugIndex{
|
||||
VaultAddress: client.Address(),
|
||||
ClientVersion: version.GetVersion().VersionNumber(),
|
||||
ServerVersion: serverHealth.Version,
|
||||
Compress: c.flagCompress,
|
||||
DurationSeconds: int(c.flagDuration.Seconds()),
|
||||
IntervalSeconds: int(c.flagInterval.Seconds()),
|
||||
|
@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string {
|
|||
return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"}
|
||||
}
|
||||
|
||||
func (c *DebugCommand) validDRSecondaryTargets() []string {
|
||||
return []string{"metrics", "replication-status", "server-status"}
|
||||
}
|
||||
|
||||
func (c *DebugCommand) captureStaticTargets() error {
|
||||
// Capture configuration state
|
||||
if strutil.StrListContains(c.flagTargets, "config") {
|
||||
|
@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) {
|
|||
c.logger.Info("capturing metrics", "count", idxCount)
|
||||
idxCount++
|
||||
|
||||
healthStatus, err := c.cachedClient.Sys().Health()
|
||||
if err != nil {
|
||||
c.captureError("metrics", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check replication status. We skip on processing metrics if we're one
|
||||
// a DR node, though non-perf standbys will fail if they aren't using
|
||||
// unauthenticated_metrics_access.
|
||||
switch {
|
||||
case healthStatus.ReplicationDRMode == "secondary":
|
||||
c.logger.Info("skipping metrics capture on DR secondary node")
|
||||
continue
|
||||
}
|
||||
|
||||
// Perform metrics request
|
||||
r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics")
|
||||
resp, err := c.cachedClient.RawRequestWithContext(ctx, r)
|
||||
|
|
|
@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded.
|
|||
Additionally, host information is not available on the OpenBSD platform due to
|
||||
library limitations in fetching the data without enabling `cgo`.
|
||||
|
||||
[Enterprise] Telemetry can be gathered from a DR Secondary active node via the
|
||||
`metrics` target if [unauthenticated_metrics_access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled.
|
||||
|
||||
## Output Layout
|
||||
|
||||
The output of the bundled information, once decompressed, is contained within a
|
||||
|
|
Loading…
Reference in New Issue