Add DR Metric scraping capability to debug command (#15316)
* Add server information as well as ability to collect metrics from DR secondary * Update debug docs Adding additional information around ability to gather metrics from DR secondary * Fix broken link in updated doc * Create 15316.txt Create changelog entry * Fix Formatting * Update website/content/docs/commands/debug.mdx Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Update changelog/15316.txt Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> * Trigger Build Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>
This commit is contained in:
parent
4be45db85b
commit
9e869c52fa
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
cli/debug: added support for retrieving metrics from DR clusters if `unauthenticated_metrics_access` is enabled
|
||||||
|
```
|
|
@ -58,6 +58,7 @@ type debugIndex struct {
|
||||||
Version int `json:"version"`
|
Version int `json:"version"`
|
||||||
VaultAddress string `json:"vault_address"`
|
VaultAddress string `json:"vault_address"`
|
||||||
ClientVersion string `json:"client_version"`
|
ClientVersion string `json:"client_version"`
|
||||||
|
ServerVersion string `json:"server_version"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
DurationSeconds int `json:"duration_seconds"`
|
DurationSeconds int `json:"duration_seconds"`
|
||||||
IntervalSeconds int `json:"interval_seconds"`
|
IntervalSeconds int `json:"interval_seconds"`
|
||||||
|
@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int {
|
||||||
c.UI.Output("==> Starting debug capture...")
|
c.UI.Output("==> Starting debug capture...")
|
||||||
c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress))
|
c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress))
|
||||||
c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion))
|
c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion))
|
||||||
|
c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion))
|
||||||
c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration))
|
c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration))
|
||||||
c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval))
|
c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval))
|
||||||
c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval))
|
c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval))
|
||||||
|
@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("unable to create client to connect to Vault: %s", err)
|
return "", fmt.Errorf("unable to create client to connect to Vault: %s", err)
|
||||||
}
|
}
|
||||||
if _, err := client.Sys().Health(); err != nil {
|
serverHealth, err := client.Sys().Health()
|
||||||
|
if err != nil {
|
||||||
return "", fmt.Errorf("unable to connect to the server: %s", err)
|
return "", fmt.Errorf("unable to connect to the server: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if server is DR Secondary and we need to further
|
||||||
|
// ignore any targets due to endpoint restrictions
|
||||||
|
if serverHealth.ReplicationDRMode == "secondary" {
|
||||||
|
invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true)
|
||||||
|
if len(invalidDRTargets) != 0 {
|
||||||
|
c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", ")))
|
||||||
|
c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
c.cachedClient = client
|
c.cachedClient = client
|
||||||
|
|
||||||
captureTime := time.Now().UTC()
|
captureTime := time.Now().UTC()
|
||||||
|
@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
|
||||||
c.debugIndex = &debugIndex{
|
c.debugIndex = &debugIndex{
|
||||||
VaultAddress: client.Address(),
|
VaultAddress: client.Address(),
|
||||||
ClientVersion: version.GetVersion().VersionNumber(),
|
ClientVersion: version.GetVersion().VersionNumber(),
|
||||||
|
ServerVersion: serverHealth.Version,
|
||||||
Compress: c.flagCompress,
|
Compress: c.flagCompress,
|
||||||
DurationSeconds: int(c.flagDuration.Seconds()),
|
DurationSeconds: int(c.flagDuration.Seconds()),
|
||||||
IntervalSeconds: int(c.flagInterval.Seconds()),
|
IntervalSeconds: int(c.flagInterval.Seconds()),
|
||||||
|
@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string {
|
||||||
return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"}
|
return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *DebugCommand) validDRSecondaryTargets() []string {
|
||||||
|
return []string{"metrics", "replication-status", "server-status"}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *DebugCommand) captureStaticTargets() error {
|
func (c *DebugCommand) captureStaticTargets() error {
|
||||||
// Capture configuration state
|
// Capture configuration state
|
||||||
if strutil.StrListContains(c.flagTargets, "config") {
|
if strutil.StrListContains(c.flagTargets, "config") {
|
||||||
|
@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) {
|
||||||
c.logger.Info("capturing metrics", "count", idxCount)
|
c.logger.Info("capturing metrics", "count", idxCount)
|
||||||
idxCount++
|
idxCount++
|
||||||
|
|
||||||
healthStatus, err := c.cachedClient.Sys().Health()
|
|
||||||
if err != nil {
|
|
||||||
c.captureError("metrics", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check replication status. We skip on processing metrics if we're one
|
|
||||||
// a DR node, though non-perf standbys will fail if they aren't using
|
|
||||||
// unauthenticated_metrics_access.
|
|
||||||
switch {
|
|
||||||
case healthStatus.ReplicationDRMode == "secondary":
|
|
||||||
c.logger.Info("skipping metrics capture on DR secondary node")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Perform metrics request
|
// Perform metrics request
|
||||||
r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics")
|
r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics")
|
||||||
resp, err := c.cachedClient.RawRequestWithContext(ctx, r)
|
resp, err := c.cachedClient.RawRequestWithContext(ctx, r)
|
||||||
|
|
|
@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded.
|
||||||
Additionally, host information is not available on the OpenBSD platform due to
|
Additionally, host information is not available on the OpenBSD platform due to
|
||||||
library limitations in fetching the data without enabling `cgo`.
|
library limitations in fetching the data without enabling `cgo`.
|
||||||
|
|
||||||
|
[Enterprise] Telemetry can be gathered from a DR Secondary active node via the
|
||||||
|
`metrics` target if [unauthenticated_metrics_access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled.
|
||||||
|
|
||||||
## Output Layout
|
## Output Layout
|
||||||
|
|
||||||
The output of the bundled information, once decompressed, is contained within a
|
The output of the bundled information, once decompressed, is contained within a
|
||||||
|
|
Loading…
Reference in New Issue