Add auto-tidy, last-tidy, and leaf cert health checks (#17901)

* Add enable_auto_tidy health check

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Add tidy_last_run health check

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Add too_many_certs health check

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Add tidy, CRL, cert count checks to CLI

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Cache stored leaf cert count

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

* Correctly parse last run

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
This commit is contained in:
Alexander Scheel 2022-11-18 11:04:58 -05:00 committed by GitHub
parent 9543067ffe
commit 8461f096e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 414 additions and 0 deletions

View file

@ -198,6 +198,7 @@ func pkiFetchLeaves(e *Executor, versionError func()) (bool, *PathFetch, []strin
leaves = append(leaves, rawSerial.(string))
}
leavesRet.ParsedCache["leaves"] = leaves
leavesRet.ParsedCache["count"] = len(leaves)
}
return false, leavesRet, leavesRet.ParsedCache["leaves"].([]string), nil

View file

@ -0,0 +1,185 @@
package healthcheck
import (
"fmt"
"time"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/go-secure-stdlib/parseutil"
)
type EnableAutoTidy struct {
Enabled bool
UnsupportedVersion bool
IntervalDurationCritical time.Duration
IntervalDurationWarning time.Duration
PauseDurationCritical time.Duration
PauseDurationWarning time.Duration
TidyConfig *PathFetch
}
func NewEnableAutoTidyCheck() Check {
return &EnableAutoTidy{}
}
func (h *EnableAutoTidy) Name() string {
return "enable_auto_tidy"
}
func (h *EnableAutoTidy) IsEnabled() bool {
return h.Enabled
}
func (h *EnableAutoTidy) DefaultConfig() map[string]interface{} {
return map[string]interface{}{
"interval_duration_critical": "7d",
"interval_duration_warning": "2d",
"pause_duration_critical": "1s",
"pause_duration_warning": "200ms",
}
}
func (h *EnableAutoTidy) fromConfig(config map[string]interface{}, param string) (time.Duration, error) {
value, err := parseutil.ParseDurationSecond(config[param])
if err != nil {
return time.Duration(0), fmt.Errorf("failed to parse parameter %v.%v=%v: %w", h.Name(), param, config[param], err)
}
return value, nil
}
func (h *EnableAutoTidy) LoadConfig(config map[string]interface{}) error {
var err error
h.IntervalDurationCritical, err = h.fromConfig(config, "interval_duration_critical")
if err != nil {
return err
}
h.IntervalDurationWarning, err = h.fromConfig(config, "interval_duration_warning")
if err != nil {
return err
}
h.PauseDurationCritical, err = h.fromConfig(config, "pause_duration_critical")
if err != nil {
return err
}
h.PauseDurationWarning, err = h.fromConfig(config, "pause_duration_warning")
if err != nil {
return err
}
enabled, err := parseutil.ParseBool(config["enabled"])
if err != nil {
return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err)
}
h.Enabled = enabled
return nil
}
func (h *EnableAutoTidy) FetchResources(e *Executor) error {
var err error
h.TidyConfig, err = e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/config/auto-tidy")
if err != nil {
return err
}
if h.TidyConfig.IsUnsupportedPathError() {
h.UnsupportedVersion = true
}
return nil
}
func (h *EnableAutoTidy) Evaluate(e *Executor) (results []*Result, err error) {
if h.UnsupportedVersion {
ret := Result{
Status: ResultInvalidVersion,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: "This health check requires Vault 1.12+, but an earlier version of Vault Server was contacted, preventing this health check from running.",
}
return []*Result{&ret}, nil
}
if h.TidyConfig == nil {
return
}
if h.TidyConfig.IsSecretPermissionsError() {
ret := Result{
Status: ResultInsufficientPermissions,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: "This prevents the health check from functioning at all, as it cannot .",
}
if e.Client.Token() == "" {
ret.Message = "No token available so unable read authenticated auto-tidy configuration for this mount. " + ret.Message
} else {
ret.Message = "This token lacks permission to read the auto-tidy configuration for this mount. " + ret.Message
}
return []*Result{&ret}, nil
}
isEnabled := h.TidyConfig.Secret.Data["enabled"].(bool)
intervalDuration, err := parseutil.ParseDurationSecond(h.TidyConfig.Secret.Data["interval_duration"])
if err != nil {
return nil, fmt.Errorf("error parsing API response from server for interval_duration: %w", err)
}
pauseDuration, err := parseutil.ParseDurationSecond(h.TidyConfig.Secret.Data["pause_duration"])
if err != nil {
return nil, fmt.Errorf("error parsing API response from server for pause_duration: %w", err)
}
if !isEnabled {
ret := Result{
Status: ResultInformational,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: "Auto-tidy is currently disabled; consider enabling auto-tidy to execute tidy operations periodically. This helps the health and performance of a mount.",
}
results = append(results, &ret)
} else {
baseMsg := "Auto-tidy is configured with too long of a value for %v (%v); this could impact performance as tidies run too infrequently or take too long to execute."
if intervalDuration >= h.IntervalDurationCritical {
ret := Result{
Status: ResultCritical,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: fmt.Sprintf(baseMsg, "interval_duration", intervalDuration),
}
results = append(results, &ret)
} else if intervalDuration >= h.IntervalDurationWarning {
ret := Result{
Status: ResultWarning,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: fmt.Sprintf(baseMsg, "interval_duration", intervalDuration),
}
results = append(results, &ret)
}
if pauseDuration >= h.PauseDurationCritical {
ret := Result{
Status: ResultCritical,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: fmt.Sprintf(baseMsg, "pause_duration", pauseDuration),
}
results = append(results, &ret)
} else if pauseDuration >= h.PauseDurationWarning {
ret := Result{
Status: ResultWarning,
Endpoint: "/{{mount}}/config/auto-tidy",
Message: fmt.Sprintf(baseMsg, "pause_duration", pauseDuration),
}
results = append(results, &ret)
}
}
return
}

View file

@ -0,0 +1,124 @@
package healthcheck
import (
"fmt"
"time"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/go-secure-stdlib/parseutil"
)
type TidyLastRun struct {
Enabled bool
UnsupportedVersion bool
LastRunCritical time.Duration
LastRunWarning time.Duration
TidyStatus *PathFetch
}
func NewTidyLastRunCheck() Check {
return &TidyLastRun{}
}
func (h *TidyLastRun) Name() string {
return "tidy_last_run"
}
func (h *TidyLastRun) IsEnabled() bool {
return h.Enabled
}
func (h *TidyLastRun) DefaultConfig() map[string]interface{} {
return map[string]interface{}{
"last_run_critical": "7d",
"last_run_warning": "2d",
}
}
func (h *TidyLastRun) LoadConfig(config map[string]interface{}) error {
var err error
h.LastRunCritical, err = parseutil.ParseDurationSecond(config["last_run_critical"])
if err != nil {
return fmt.Errorf("failed to parse parameter %v.%v=%v: %w", h.Name(), "last_run_critical", config["last_run_critical"], err)
}
h.LastRunWarning, err = parseutil.ParseDurationSecond(config["last_run_warning"])
if err != nil {
return fmt.Errorf("failed to parse parameter %v.%v=%v: %w", h.Name(), "last_run_warning", config["last_run_warning"], err)
}
enabled, err := parseutil.ParseBool(config["enabled"])
if err != nil {
return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err)
}
h.Enabled = enabled
return nil
}
func (h *TidyLastRun) FetchResources(e *Executor) error {
var err error
h.TidyStatus, err = e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/tidy-status")
if err != nil {
return fmt.Errorf("failed to fetch mount's tidy-status value: %v", err)
}
if h.TidyStatus.IsUnsupportedPathError() {
h.UnsupportedVersion = true
}
return nil
}
func (h *TidyLastRun) Evaluate(e *Executor) (results []*Result, err error) {
if h.UnsupportedVersion {
// Shouldn't happen; roles have been around forever.
ret := Result{
Status: ResultInvalidVersion,
Endpoint: "/{{mount}}/tidy-status",
Message: "This health check requires Vault 1.10+ but an earlier version of Vault Server was contacted, preventing this health check from running.",
}
return []*Result{&ret}, nil
}
baseMsg := "Tidy hasn't run in the last %v; this can point to problems with the mount's auto-tidy configuration or an external tidy executor; this can impact PKI's and Vault's performance if not run regularly."
ret := Result{
Status: ResultOK,
Endpoint: "/{{mount}}/tidy-status",
Message: "Tidy has run recently on this mount.",
}
if h.TidyStatus.Secret != nil && h.TidyStatus.Secret.Data != nil {
when := h.TidyStatus.Secret.Data["time_finished"]
if when == nil {
ret.Status = ResultCritical
ret.Message = "Tidy hasn't run since this mount was created; this can point to problems with the mount's auto-tidy configuration or an external tidy executor; this can impact PKI's and Vault's performance if not run regularly. It is suggested to enable auto-tidy on this mount."
} else {
now := time.Now()
lastRunCritical := now.Add(-1 * h.LastRunCritical)
lastRunWarning := now.Add(-1 * h.LastRunWarning)
whenT, err := parseutil.ParseAbsoluteTime(when)
if err != nil {
return nil, fmt.Errorf("error parsing time value (%v): %w", when, err)
}
if whenT.Before(lastRunCritical) {
ret.Status = ResultCritical
ret.Message = fmt.Sprintf(baseMsg, h.LastRunCritical)
} else if whenT.Before(lastRunWarning) {
ret.Status = ResultWarning
ret.Message = fmt.Sprintf(baseMsg, h.LastRunWarning)
}
}
}
results = append(results, &ret)
return
}

View file

@ -0,0 +1,101 @@
package healthcheck
import (
"fmt"
"github.com/hashicorp/go-secure-stdlib/parseutil"
)
type TooManyCerts struct {
Enabled bool
UnsupportedVersion bool
CountCritical int
CountWarning int
CertCounts int
}
func NewTooManyCertsCheck() Check {
return &TooManyCerts{}
}
func (h *TooManyCerts) Name() string {
return "too_many_certs"
}
func (h *TooManyCerts) IsEnabled() bool {
return h.Enabled
}
func (h *TooManyCerts) DefaultConfig() map[string]interface{} {
return map[string]interface{}{
"count_critical": 250000,
"count_warning": 50000,
}
}
func (h *TooManyCerts) LoadConfig(config map[string]interface{}) error {
value, err := parseutil.SafeParseIntRange(config["count_critical"], 1, 15000000)
if err != nil {
return fmt.Errorf("error parsing %v.count_critical: %w", h.Name(), err)
}
h.CountCritical = int(value)
value, err = parseutil.SafeParseIntRange(config["count_warning"], 1, 15000000)
if err != nil {
return fmt.Errorf("error parsing %v.count_warning: %w", h.Name(), err)
}
h.CountWarning = int(value)
h.Enabled, err = parseutil.ParseBool(config["enabled"])
if err != nil {
return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err)
}
return nil
}
func (h *TooManyCerts) FetchResources(e *Executor) error {
exit, leavesRet, _, err := pkiFetchLeaves(e, func() {
h.UnsupportedVersion = true
})
if exit {
return err
}
h.CertCounts = leavesRet.ParsedCache["count"].(int)
return nil
}
func (h *TooManyCerts) Evaluate(e *Executor) (results []*Result, err error) {
if h.UnsupportedVersion {
// Shouldn't happen; /certs has been around forever.
ret := Result{
Status: ResultInvalidVersion,
Endpoint: "/{{mount}}/certs",
Message: "This health check requires Vault 1.11+ but an earlier version of Vault Server was contacted, preventing this health check from running.",
}
return []*Result{&ret}, nil
}
ret := Result{
Status: ResultOK,
Endpoint: "/{{mount}}/certs",
Message: "This mount has an OK number of stored certificates.",
}
baseMsg := "This PKI mount has %v outstanding stored certificates; consider using no_store=false on roles, running tidy operations periodically, and using shorter certificate lifetimes to reduce the storage pressure on this mount."
if h.CertCounts >= h.CountCritical {
ret.Status = ResultCritical
ret.Message = fmt.Sprintf(baseMsg, h.CertCounts)
} else if h.CertCounts >= h.CountWarning {
ret.Status = ResultWarning
ret.Message = fmt.Sprintf(baseMsg, h.CertCounts)
}
results = append(results, &ret)
return
}

View file

@ -202,6 +202,9 @@ func (c *PKIHealthCheckCommand) Run(args []string) int {
executor.AddCheck(healthcheck.NewRoleAllowsLocalhostCheck())
executor.AddCheck(healthcheck.NewRoleAllowsGlobWildcardsCheck())
executor.AddCheck(healthcheck.NewRoleNoStoreFalseCheck())
executor.AddCheck(healthcheck.NewEnableAutoTidyCheck())
executor.AddCheck(healthcheck.NewTidyLastRunCheck())
executor.AddCheck(healthcheck.NewTooManyCertsCheck())
if c.flagDefaultDisabled {
executor.DefaultEnabled = false
}