PKI Health Check Command (#17750)

* Stub out initial health check command This command will be used to generate health check results for the PKI engine. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Start common health check implementation Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add common health check utilities These utilities will collect helpers not specific to PKI health checks, such as formatting longer durations more legibly. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add PKI health check common utils Many health checks will need issuer and/or CRL information in order to execute. We've centrally located these helpers to avoid particular health checks from needing to reimplement them each time. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Adding ca_validity_period health check Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Begin using health-checks in PKI command Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Allow parsing raw requests afterwards This shifts the last of the logic difference between Read(...) and ReadRaw(...) to a new helper, allowing ReadRaw(...) requests to be parsed into the same response structure afterwards as Read(...); this allows API callers to fetch the raw secret and inspect the raw response object in case something went wrong (error code &c) -- and when the request succeeds, they can still get the api.Secret out. This will be used with the PKI health check functionality, making both LIST and READ operations use ReadRaw, and optionally parsing the secret afterwards. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add crl_validity_period health check Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add tests for PKI health check Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Fix bug in raw reading with contexts When reading raw objects, don't manually call the context cancellation: this causes timeouts and/or EOF errors when attempting to read or parse the response body. See message in client.RawRequestWithContext(...) for more information. This was causing the test suite to randomly fail, due to the context cancelling. The test suite's client usually had a default timeout, whereas the CLI didn't, and thus didn't exhibit the same issue. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add changelog Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Fix typo in permissions message Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Move %v->%w for errs Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
2022-11-16 09:27:56 -05:00 · 2022-11-16 09:27:56 -05:00 · 5101e31857
parent ab395f2c79
commit 5101e31857
10 changed files with 1273 additions and 15 deletions
--- a/api/logical.go
+++ b/api/logical.go
@ -66,6 +66,27 @@ func (c *Logical) ReadWithDataWithContext(ctx context.Context, path string, data
 	defer cancelFunc()

 	resp, err := c.readRawWithDataWithContext(ctx, path, data)
+	return c.ParseRawResponseAndCloseBody(resp, err)
+}
+
+func (c *Logical) ReadRaw(path string) (*Response, error) {
+	return c.ReadRawWithData(path, nil)
+}
+
+func (c *Logical) ReadRawWithData(path string, data map[string][]string) (*Response, error) {
+	return c.ReadRawWithDataWithContext(context.Background(), path, data)
+}
+
+func (c *Logical) ReadRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) {
+	// See note in client.go, RawRequestWithContext for why we do not call
+	// Cancel here. The difference between these two methods are that the
+	// former takes a Request object directly, whereas this builds one
+	// up for the caller.
+	ctx, _ = c.c.withConfiguredTimeout(ctx)
+	return c.readRawWithDataWithContext(ctx, path, data)
+}
+
+func (c *Logical) ParseRawResponseAndCloseBody(resp *Response, err error) (*Secret, error) {
 	if resp != nil {
 		defer resp.Body.Close()
 	}
@ -90,21 +111,6 @@ func (c *Logical) ReadWithDataWithContext(ctx context.Context, path string, data
 	return ParseSecret(resp.Body)
 }

-func (c *Logical) ReadRaw(path string) (*Response, error) {
-	return c.ReadRawWithData(path, nil)
-}
-
-func (c *Logical) ReadRawWithData(path string, data map[string][]string) (*Response, error) {
-	return c.ReadRawWithDataWithContext(context.Background(), path, data)
-}
-
-func (c *Logical) ReadRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) {
-	ctx, cancelFunc := c.c.withConfiguredTimeout(ctx)
-	defer cancelFunc()
-
-	return c.readRawWithDataWithContext(ctx, path, data)
-}
-
 func (c *Logical) readRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) {
 	r := c.c.NewRequest(http.MethodGet, "/v1/"+path)

--- a/changelog/17750.txt
+++ b/changelog/17750.txt
@ -0,0 +1,3 @@
+```release-note:improvement
+cli/pki: Add health-check subcommand to evaluate the health of a PKI instance.
+```
--- a/command/commands.go
+++ b/command/commands.go
@ -787,6 +787,11 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) {
 				ShutdownCh:  MakeShutdownCh(),
 			}, nil
 		},
+		"pki health-check": func() (cli.Command, error) {
+			return &PKIHealthCheckCommand{
+				BaseCommand: getBaseCommand(),
+			}, nil
+		},
 	}

 	// Disabled by default until functional
--- a/command/healthcheck/common.go
+++ b/command/healthcheck/common.go
@ -0,0 +1,276 @@
+/*
+ * The healthcheck package attempts to allow generic checks of arbitrary
+ * engines, while providing a common framework with some performance
+ * efficiencies in mind.
+ *
+ * The core of this package is the Executor context; a caller would
+ * provision a set of checks, an API client, and a configuration,
+ * which the executor would use to decide which checks to execute
+ * and how.
+ *
+ * Checks are based around a series of remote paths that are fetched by
+ * the client; these are broken into two categories: static paths, which
+ * can always be fetched; and dynamic paths, which the check fetches based
+ * on earlier results.
+ *
+ * For instance, a basic PKI CA lifetime check will have static fetch against
+ * the list of CAs, and a dynamic fetch, using that earlier list, to fetch the
+ * PEMs of all CAs.
+ *
+ * This allows health checks to share data: many PKI checks will need the
+ * issuer list and so repeatedly fetching this may result in a performance
+ * impact.
+ */
+
+package healthcheck
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/hashicorp/vault/api"
+	"github.com/hashicorp/vault/sdk/logical"
+)
+
+type Executor struct {
+	Client         *api.Client
+	Mount          string
+	DefaultEnabled bool
+
+	Config map[string]map[string]interface{}
+
+	Resources map[string]map[logical.Operation]*PathFetch
+
+	Checkers []Check
+}
+
+func NewExecutor(client *api.Client, mount string) *Executor {
+	return &Executor{
+		Client:         client,
+		DefaultEnabled: true,
+		Mount:          mount,
+		Config:         make(map[string]map[string]interface{}),
+		Resources:      make(map[string]map[logical.Operation]*PathFetch),
+	}
+}
+
+func (e *Executor) AddCheck(c Check) {
+	e.Checkers = append(e.Checkers, c)
+}
+
+func (e *Executor) BuildConfig(external map[string]interface{}) error {
+	merged := e.Config
+
+	for index, checker := range e.Checkers {
+		name := checker.Name()
+		if _, present := merged[name]; name == "" || present {
+			return fmt.Errorf("bad checker %v: name is empty or already present: %v", index, name)
+		}
+
+		// Fetch the default configuration; if the check returns enabled
+		// status, verify it matches our expectations (in the event it should
+		// be disabled by default), otherwise, add it in.
+		config := checker.DefaultConfig()
+		enabled, present := config["enabled"]
+		if !present {
+			config["enabled"] = e.DefaultEnabled
+		} else if enabled.(bool) && !e.DefaultEnabled {
+			config["enabled"] = e.DefaultEnabled
+		}
+
+		// Now apply any external config for this check.
+		if econfig, present := external[name]; present {
+			for param, evalue := range econfig.(map[string]interface{}) {
+				if _, ok := config[param]; !ok {
+					// Assumption: default configs have all possible
+					// configuration options. This external config has
+					// an unknown option, so we want to error out.
+					return fmt.Errorf("unknown configuration option for %v: %v", name, param)
+				}
+
+				config[param] = evalue
+			}
+		}
+
+		// Now apply it and save it.
+		if err := checker.LoadConfig(config); err != nil {
+			return fmt.Errorf("error saving merged config for %v: %w", name, err)
+		}
+		merged[name] = config
+	}
+
+	return nil
+}
+
+func (e *Executor) Execute() (map[string][]*Result, error) {
+	ret := make(map[string][]*Result)
+	for _, checker := range e.Checkers {
+		if !checker.IsEnabled() {
+			continue
+		}
+
+		if err := checker.FetchResources(e); err != nil {
+			return nil, err
+		}
+
+		results, err := checker.Evaluate(e)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, result := range results {
+			result.Endpoint = e.templatePath(result.Endpoint)
+			result.StatusDisplay = ResultStatusNameMap[result.Status]
+		}
+
+		ret[checker.Name()] = results
+	}
+
+	return ret, nil
+}
+
+func (e *Executor) templatePath(path string) string {
+	return strings.ReplaceAll(path, "{{mount}}", e.Mount)
+}
+
+func (e *Executor) FetchIfNotFetched(op logical.Operation, rawPath string) (*PathFetch, error) {
+	path := e.templatePath(rawPath)
+
+	byOp, present := e.Resources[path]
+	if present && byOp != nil {
+		result, present := byOp[op]
+		if present && result != nil {
+			return result, result.FetchSurfaceError()
+		}
+	}
+
+	// Must not exist in cache; create it.
+	if byOp == nil {
+		e.Resources[path] = make(map[logical.Operation]*PathFetch)
+	}
+
+	ret := &PathFetch{
+		Operation:   op,
+		Path:        path,
+		ParsedCache: make(map[string]interface{}),
+	}
+
+	data := map[string][]string{}
+	if op == logical.ListOperation {
+		data["list"] = []string{"true"}
+	} else if op != logical.ReadOperation {
+		return nil, fmt.Errorf("unknown operation: %v on %v", op, path)
+	}
+
+	response, err := e.Client.Logical().ReadRawWithData(path, data)
+	ret.Response = response
+	if err != nil {
+		ret.FetchError = err
+	} else {
+		// Not all secrets will parse correctly. Sometimes we really want
+		// to fetch a raw endpoint, sometimes we're run with a bad mount
+		// or missing permissions.
+		secret, secretErr := e.Client.Logical().ParseRawResponseAndCloseBody(response, err)
+		if secretErr != nil {
+			ret.SecretParseError = secretErr
+		} else {
+			ret.Secret = secret
+		}
+	}
+
+	e.Resources[path][op] = ret
+	return ret, ret.FetchSurfaceError()
+}
+
+type PathFetch struct {
+	Operation        logical.Operation
+	Path             string
+	Response         *api.Response
+	FetchError       error
+	Secret           *api.Secret
+	SecretParseError error
+	ParsedCache      map[string]interface{}
+}
+
+func (p *PathFetch) IsOK() bool {
+	return p.FetchError == nil && p.Response != nil
+}
+
+func (p *PathFetch) IsSecretOK() bool {
+	return p.IsOK() && p.SecretParseError == nil && p.Secret != nil
+}
+
+func (p *PathFetch) FetchSurfaceError() error {
+	if p.IsOK() || p.IsSecretPermissionsError() || p.IsUnsupportedPathError() || p.IsMissingResource() {
+		return nil
+	}
+
+	if strings.Contains(p.FetchError.Error(), "route entry not found") {
+		return fmt.Errorf("Error making API request: was a bad mount given?\n\nOperation: %v\nPath: %v\nOriginal Error:\n%w", p.Operation, p.Path, p.FetchError)
+	}
+
+	return p.FetchError
+}
+
+func (p *PathFetch) IsSecretPermissionsError() bool {
+	return !p.IsOK() && strings.Contains(p.FetchError.Error(), "permission denied")
+}
+
+func (p *PathFetch) IsUnsupportedPathError() bool {
+	return !p.IsOK() && strings.Contains(p.FetchError.Error(), "unsupported path")
+}
+
+func (p *PathFetch) IsMissingResource() bool {
+	return !p.IsOK() && strings.Contains(p.FetchError.Error(), "unable to find")
+}
+
+type Check interface {
+	Name() string
+	IsEnabled() bool
+
+	DefaultConfig() map[string]interface{}
+	LoadConfig(config map[string]interface{}) error
+
+	FetchResources(e *Executor) error
+
+	Evaluate(e *Executor) ([]*Result, error)
+}
+
+type ResultStatus int
+
+const (
+	ResultNotApplicable ResultStatus = iota
+	ResultOK
+	ResultInformational
+	ResultWarning
+	ResultCritical
+	ResultInvalidVersion
+	ResultInsufficientPermissions
+)
+
+var ResultStatusNameMap = map[ResultStatus]string{
+	ResultNotApplicable:           "not_applicable",
+	ResultOK:                      "ok",
+	ResultInformational:           "informational",
+	ResultWarning:                 "warning",
+	ResultCritical:                "critical",
+	ResultInvalidVersion:          "invalid_version",
+	ResultInsufficientPermissions: "insufficient_permissions",
+}
+
+var NameResultStatusMap = map[string]ResultStatus{
+	"not_applicable":           ResultNotApplicable,
+	"ok":                       ResultOK,
+	"informational":            ResultInformational,
+	"warning":                  ResultWarning,
+	"critical":                 ResultCritical,
+	"invalid_version":          ResultInvalidVersion,
+	"insufficient_permissions": ResultInsufficientPermissions,
+}
+
+type Result struct {
+	Status        ResultStatus `json:"status_code"`
+	StatusDisplay string       `json:"status"`
+	Endpoint      string       `json:"endpoint,omitempty"`
+	Message       string       `json:"message,omitempty"`
+}
--- a/command/healthcheck/pki.go
+++ b/command/healthcheck/pki.go
@ -0,0 +1,128 @@
+package healthcheck
+
+import (
+	"crypto/x509"
+	"encoding/pem"
+	"fmt"
+
+	"github.com/hashicorp/vault/sdk/logical"
+)
+
+func pkiFetchIssuers(e *Executor, versionError func()) (bool, *PathFetch, []string, error) {
+	issuersRet, err := e.FetchIfNotFetched(logical.ListOperation, "/{{mount}}/issuers")
+	if err != nil {
+		return true, nil, nil, err
+	}
+
+	if !issuersRet.IsSecretOK() {
+		if issuersRet.IsUnsupportedPathError() {
+			versionError()
+		}
+
+		return true, nil, nil, nil
+	}
+
+	if len(issuersRet.ParsedCache) == 0 {
+		var issuers []string
+		for _, rawIssuerId := range issuersRet.Secret.Data["keys"].([]interface{}) {
+			issuers = append(issuers, rawIssuerId.(string))
+		}
+		issuersRet.ParsedCache["issuers"] = issuers
+	}
+
+	return false, issuersRet, issuersRet.ParsedCache["issuers"].([]string), nil
+}
+
+func parsePEM(contents string) ([]byte, error) {
+	// Need to parse out the issuer from its PEM format.
+	pemBlock, _ := pem.Decode([]byte(contents))
+	if pemBlock == nil {
+		return nil, fmt.Errorf("invalid PEM block")
+	}
+
+	return pemBlock.Bytes, nil
+}
+
+func parsePEMCert(contents string) (*x509.Certificate, error) {
+	parsed, err := parsePEM(contents)
+	if err != nil {
+		return nil, err
+	}
+
+	cert, err := x509.ParseCertificate(parsed)
+	if err != nil {
+		return nil, fmt.Errorf("invalid certificate: %w", err)
+	}
+
+	return cert, nil
+}
+
+func parsePEMCRL(contents string) (*x509.RevocationList, error) {
+	parsed, err := parsePEM(contents)
+	if err != nil {
+		return nil, err
+	}
+
+	crl, err := x509.ParseRevocationList(parsed)
+	if err != nil {
+		return nil, fmt.Errorf("invalid CRL: %w", err)
+	}
+
+	return crl, nil
+}
+
+func pkiFetchIssuer(e *Executor, issuer string, versionError func()) (bool, *PathFetch, *x509.Certificate, error) {
+	issuerRet, err := e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/issuer/"+issuer+"/json")
+	if err != nil {
+		return true, nil, nil, err
+	}
+
+	if !issuerRet.IsSecretOK() {
+		if issuerRet.IsUnsupportedPathError() {
+			versionError()
+		}
+		return true, nil, nil, nil
+	}
+
+	if len(issuerRet.ParsedCache) == 0 {
+		cert, err := parsePEMCert(issuerRet.Secret.Data["certificate"].(string))
+		if err != nil {
+			return true, nil, nil, fmt.Errorf("unable to parse issuer %v's certificate: %w", issuer, err)
+		}
+
+		issuerRet.ParsedCache["certificate"] = cert
+	}
+
+	return false, issuerRet, issuerRet.ParsedCache["certificate"].(*x509.Certificate), nil
+}
+
+func pkiFetchIssuerCRL(e *Executor, issuer string, delta bool, versionError func()) (bool, *PathFetch, *x509.RevocationList, error) {
+	path := "/{{mount}}/issuer/" + issuer + "/crl"
+	name := "CRL"
+	if delta {
+		path += "/delta"
+		name = "Delta CRL"
+	}
+
+	crlRet, err := e.FetchIfNotFetched(logical.ReadOperation, path)
+	if err != nil {
+		return true, nil, nil, err
+	}
+
+	if !crlRet.IsSecretOK() {
+		if crlRet.IsUnsupportedPathError() {
+			versionError()
+		}
+		return true, nil, nil, nil
+	}
+
+	if len(crlRet.ParsedCache) == 0 {
+		crl, err := parsePEMCRL(crlRet.Secret.Data["crl"].(string))
+		if err != nil {
+			return true, nil, nil, fmt.Errorf("unable to parse issuer %v's %v: %w", issuer, name, err)
+		}
+		crlRet.ParsedCache["crl"] = crl
+	}
+
+	return false, crlRet, crlRet.ParsedCache["crl"].(*x509.RevocationList), nil
+}
--- a/command/healthcheck/pki_ca_validity_period.go
+++ b/command/healthcheck/pki_ca_validity_period.go
@ -0,0 +1,166 @@
+package healthcheck
+
+import (
+	"bytes"
+	"crypto/x509"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/hashicorp/go-secure-stdlib/parseutil"
+)
+
+type CAValidityPeriod struct {
+	Enabled bool
+
+	RootExpiries          map[ResultStatus]time.Duration
+	IntermediateExpieries map[ResultStatus]time.Duration
+
+	UnsupportedVersion bool
+
+	Issuers map[string]*x509.Certificate
+}
+
+func NewCAValidityPeriodCheck() Check {
+	return &CAValidityPeriod{
+		RootExpiries:          make(map[ResultStatus]time.Duration, 3),
+		IntermediateExpieries: make(map[ResultStatus]time.Duration, 3),
+		Issuers:               make(map[string]*x509.Certificate),
+	}
+}
+
+func (h *CAValidityPeriod) Name() string {
+	return "ca_validity_period"
+}
+
+func (h *CAValidityPeriod) IsEnabled() bool {
+	return h.Enabled
+}
+
+func (h *CAValidityPeriod) DefaultConfig() map[string]interface{} {
+	return map[string]interface{}{
+		"root_expiry_critical":              "180d",
+		"intermediate_expiry_critical":      "30d",
+		"root_expiry_warning":               "365d",
+		"intermediate_expiry_warning":       "60d",
+		"root_expiry_informational":         "730d",
+		"intermediate_expiry_informational": "180d",
+	}
+}
+
+func (h *CAValidityPeriod) LoadConfig(config map[string]interface{}) error {
+	parameters := []string{
+		"root_expiry_critical",
+		"intermediate_expiry_critical",
+		"root_expiry_warning",
+		"intermediate_expiry_warning",
+		"root_expiry_informational",
+		"intermediate_expiry_informational",
+	}
+	for _, parameter := range parameters {
+		name_split := strings.Split(parameter, "_")
+		if len(name_split) != 3 || name_split[1] != "expiry" {
+			return fmt.Errorf("bad parameter: %v / %v / %v", parameter, len(name_split), name_split[1])
+		}
+
+		status, present := NameResultStatusMap[name_split[2]]
+		if !present {
+			return fmt.Errorf("bad parameter: %v's type %v isn't in name map", parameter, name_split[2])
+		}
+
+		value_raw, present := config[parameter]
+		if !present {
+			return fmt.Errorf("parameter not present in config; Executor should've handled this for us: %v", parameter)
+		}
+
+		value, err := parseutil.ParseDurationSecond(value_raw)
+		if err != nil {
+			return fmt.Errorf("failed to parse parameter (%v=%v): %w", parameter, value_raw, err)
+		}
+
+		if name_split[0] == "root" {
+			h.RootExpiries[status] = value
+		} else if name_split[0] == "intermediate" {
+			h.IntermediateExpieries[status] = value
+		} else {
+			return fmt.Errorf("bad parameter: %v's CA type isn't root/intermediate: %v", parameters, name_split[0])
+		}
+	}
+
+	enabled, err := parseutil.ParseBool(config["enabled"])
+	if err != nil {
+		return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err)
+	}
+	h.Enabled = enabled
+
+	return nil
+}
+
+func (h *CAValidityPeriod) FetchResources(e *Executor) error {
+	exit, _, issuers, err := pkiFetchIssuers(e, func() {
+		h.UnsupportedVersion = true
+	})
+	if exit {
+		return err
+	}
+
+	for _, issuer := range issuers {
+		skip, _, cert, err := pkiFetchIssuer(e, issuer, func() {
+			h.UnsupportedVersion = true
+		})
+		if skip {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+
+		h.Issuers[issuer] = cert
+	}
+
+	return nil
+}
+
+func (h *CAValidityPeriod) Evaluate(e *Executor) (results []*Result, err error) {
+	if h.UnsupportedVersion {
+		ret := Result{
+			Status:   ResultInvalidVersion,
+			Endpoint: "/{{mount}}/issuers",
+			Message:  "This health check requires Vault 1.11+ but an earlier version of Vault Server was contacted, preventing this health check from running.",
+		}
+		return []*Result{&ret}, nil
+	}
+
+	now := time.Now()
+
+	for name, cert := range h.Issuers {
+		var ret Result
+		ret.Status = ResultOK
+		ret.Endpoint = "/{{mount}}/issuer/" + name
+		ret.Message = fmt.Sprintf("Issuer's validity (%v) is OK", cert.NotAfter.Format("2006-01-02"))
+
+		hasSelfReference := bytes.Equal(cert.RawSubject, cert.RawIssuer)
+		isSelfSigned := cert.CheckSignatureFrom(cert) == nil
+		isRoot := hasSelfReference && isSelfSigned
+
+		for _, criticality := range []ResultStatus{ResultCritical, ResultWarning, ResultInformational} {
+			var d time.Duration
+			if isRoot {
+				d = h.RootExpiries[criticality]
+			} else {
+				d = h.IntermediateExpieries[criticality]
+			}
+
+			windowExpiry := now.Add(d)
+			if cert.NotAfter.Before(windowExpiry) {
+				ret.Status = criticality
+				ret.Message = fmt.Sprintf("Issuer's validity is outside of the suggested rotation window: issuer is valid until %v but expires within %v (ending on %v). It is suggested to start rotating this issuer to new key material to avoid future downtime caused by this current issuer expiring.", cert.NotAfter.Format("2006-01-02"), FormatDuration(d), windowExpiry.Format("2006-01-02"))
+				break
+			}
+		}
+
+		results = append(results, &ret)
+	}
+
+	return
+}
--- a/command/healthcheck/pki_crl_validity_period.go
+++ b/command/healthcheck/pki_crl_validity_period.go
@ -0,0 +1,205 @@
+package healthcheck
+
+import (
+	"crypto/x509"
+	"fmt"
+	"time"
+
+	"github.com/hashicorp/vault/sdk/logical"
+
+	"github.com/hashicorp/go-secure-stdlib/parseutil"
+)
+
+type CRLValidityPeriod struct {
+	Enabled bool
+
+	CRLExpiryPercentage      int
+	DeltaCRLExpiryPercentage int
+
+	UnsupportedVersion bool
+	NoDeltas           bool
+
+	CRLs      map[string]*x509.RevocationList
+	DeltaCRLs map[string]*x509.RevocationList
+
+	CRLConfig *PathFetch
+}
+
+func NewCRLValidityPeriodCheck() Check {
+	return &CRLValidityPeriod{
+		CRLs:      make(map[string]*x509.RevocationList),
+		DeltaCRLs: make(map[string]*x509.RevocationList),
+	}
+}
+
+func (h *CRLValidityPeriod) Name() string {
+	return "crl_validity_period"
+}
+
+func (h *CRLValidityPeriod) IsEnabled() bool {
+	return h.Enabled
+}
+
+func (h *CRLValidityPeriod) DefaultConfig() map[string]interface{} {
+	return map[string]interface{}{
+		"crl_expiry_pct_critical":       "95",
+		"delta_crl_expiry_pct_critical": "95",
+	}
+}
+
+func (h *CRLValidityPeriod) LoadConfig(config map[string]interface{}) error {
+	value, err := parseutil.SafeParseIntRange(config["crl_expiry_pct_critical"], 1, 99)
+	if err != nil {
+		return fmt.Errorf("error parsing %v.crl_expiry_pct_critical=%v: %w", h.Name(), config["crl_expiry_pct_critical"], err)
+	}
+	h.CRLExpiryPercentage = int(value)
+
+	value, err = parseutil.SafeParseIntRange(config["delta_crl_expiry_pct_critical"], 1, 99)
+	if err != nil {
+		return fmt.Errorf("error parsing %v.delta_crl_expiry_pct_critical=%v: %w", h.Name(), config["delta_crl_expiry_pct_critical"], err)
+	}
+	h.DeltaCRLExpiryPercentage = int(value)
+
+	enabled, err := parseutil.ParseBool(config["enabled"])
+	if err != nil {
+		return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err)
+	}
+	h.Enabled = enabled
+
+	return nil
+}
+
+func (h *CRLValidityPeriod) FetchResources(e *Executor) error {
+	exit, _, issuers, err := pkiFetchIssuers(e, func() {
+		h.UnsupportedVersion = true
+	})
+	if exit {
+		return err
+	}
+
+	for _, issuer := range issuers {
+		exit, _, crl, err := pkiFetchIssuerCRL(e, issuer, false, func() {
+			h.UnsupportedVersion = true
+		})
+		if exit {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+
+		h.CRLs[issuer] = crl
+
+		exit, _, delta, err := pkiFetchIssuerCRL(e, issuer, true, func() {
+			h.NoDeltas = true
+		})
+		if exit {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+
+		h.DeltaCRLs[issuer] = delta
+	}
+
+	// Check if the issuer is fetched yet.
+	configRet, err := e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/config/crl")
+	if err != nil {
+		return err
+	}
+
+	h.CRLConfig = configRet
+
+	return nil
+}
+
+func (h *CRLValidityPeriod) Evaluate(e *Executor) (results []*Result, err error) {
+	if h.UnsupportedVersion {
+		ret := Result{
+			Status:   ResultInvalidVersion,
+			Endpoint: "/{{mount}}/issuers",
+			Message:  "This health check requires Vault 1.11+ but an earlier version of Vault Server was contacted, preventing this health check from running.",
+		}
+		return []*Result{&ret}, nil
+	}
+
+	now := time.Now()
+	crlDisabled := false
+	if h.CRLConfig != nil {
+		if h.CRLConfig.IsSecretPermissionsError() {
+			ret := Result{
+				Status:   ResultInsufficientPermissions,
+				Endpoint: "/{{mount}}/config/crl",
+				Message:  "This prevents the health check from seeing if the CRL is disabled and dropping the severity of this check accordingly.",
+			}
+
+			if e.Client.Token() == "" {
+				ret.Message = "No token available so unable read authenticated CRL configuration for this mount. " + ret.Message
+			} else {
+				ret.Message = "This token lacks permission to read the CRL configuration for this mount. " + ret.Message
+			}
+
+			results = append(results, &ret)
+		} else if h.CRLConfig.Secret != nil && h.CRLConfig.Secret.Data["disabled"] != nil {
+			crlDisabled = h.CRLConfig.Secret.Data["disabled"].(bool)
+		}
+	}
+
+	if h.NoDeltas && len(h.DeltaCRLs) == 0 {
+		ret := Result{
+			Status:   ResultInvalidVersion,
+			Endpoint: "/{{mount}}/issuer/*/crl/delta",
+			Message:  "This health check validates Delta CRLs on Vault 1.12+, but an earlier version of Vault was used. No results about delta CRL validity will be returned.",
+		}
+		results = append(results, &ret)
+	}
+
+	for name, crl := range h.CRLs {
+		var ret Result
+		ret.Status = ResultOK
+		ret.Endpoint = "/{{mount}}/issuer/" + name + "/crl"
+		ret.Message = fmt.Sprintf("CRL's validity (%v to %v) is OK.", crl.ThisUpdate.Format("2006-01-02"), crl.NextUpdate.Format("2006-01-02"))
+
+		used := now.Sub(crl.ThisUpdate)
+		total := crl.NextUpdate.Sub(crl.ThisUpdate)
+		ratio := time.Duration((int64(total) * int64(h.CRLExpiryPercentage)) / int64(100))
+		if used >= ratio {
+			expWhen := crl.ThisUpdate.Add(ratio)
+			ret.Status = ResultCritical
+			ret.Message = fmt.Sprintf("CRL's validity is outside of suggested rotation window: CRL's next update is expected at %v, but expires within %v%% of validity window (starting on %v and ending on %v). It is suggested to rotate this CRL and start propagating it to hosts to avoid any issues caused by stale CRLs.", crl.NextUpdate.Format("2006-01-02"), h.CRLExpiryPercentage, crl.ThisUpdate.Format("2006-01-02"), expWhen.Format("2006-01-02"))
+
+			if crlDisabled == true {
+				ret.Status = ResultInformational
+				ret.Message += " Because the CRL is disabled, this is less of a concern."
+			}
+		}
+
+		results = append(results, &ret)
+	}
+
+	for name, crl := range h.DeltaCRLs {
+		var ret Result
+		ret.Status = ResultOK
+		ret.Endpoint = "/{{mount}}/issuer/" + name + "/crl/delta"
+		ret.Message = fmt.Sprintf("Delta CRL's validity (%v to %v) is OK.", crl.ThisUpdate.Format("2006-01-02"), crl.NextUpdate.Format("2006-01-02"))
+
+		used := now.Sub(crl.ThisUpdate)
+		total := crl.NextUpdate.Sub(crl.ThisUpdate)
+		ratio := time.Duration((int64(total) * int64(h.DeltaCRLExpiryPercentage)) / int64(100))
+		if used >= ratio {
+			expWhen := crl.ThisUpdate.Add(ratio)
+			ret.Status = ResultCritical
+			ret.Message = fmt.Sprintf("Delta CRL's validity is outside of suggested rotation window: Delta CRL's next update is expected at %v, but expires within %v%% of validity window (starting on %v and ending on %v). It is suggested to rotate this Delta CRL and start propagating it to hosts to avoid any issues caused by stale CRLs.", crl.NextUpdate.Format("2006-01-02"), h.CRLExpiryPercentage, crl.ThisUpdate.Format("2006-01-02"), expWhen.Format("2006-01-02"))
+
+			if crlDisabled == true {
+				ret.Status = ResultInformational
+				ret.Message += " Because the CRL is disabled, this is less of a concern."
+			}
+		}
+
+		results = append(results, &ret)
+	}
+
+	return
+}
--- a/command/healthcheck/util.go
+++ b/command/healthcheck/util.go
@ -0,0 +1,39 @@
+package healthcheck
+
+import (
+	"fmt"
+	"time"
+)
+
+var (
+	oneDay   = 24 * time.Hour
+	oneWeek  = 7 * oneDay
+	oneMonth = 30 * oneDay
+	oneYear  = 365 * oneDay
+)
+
+var suffixDurationMap = map[string]time.Duration{
+	"y":  oneYear,
+	"mo": oneMonth,
+	"w":  oneWeek,
+	"d":  oneDay,
+}
+var orderedSuffixes = []string{"y", "mo", "w", "d"}
+
+func FormatDuration(d time.Duration) string {
+	var result string
+	for _, suffix := range orderedSuffixes {
+		unit := suffixDurationMap[suffix]
+		if d > unit {
+			quantity := int64(d / unit)
+			result = fmt.Sprintf("%v%v%v", quantity, suffix, result)
+			d = d - (time.Duration(quantity) * unit)
+		}
+	}
+
+	if d > 0 {
+		result = d.String() + result
+	}
+
+	return result
+}
--- a/command/pki_health_check.go
+++ b/command/pki_health_check.go
@ -0,0 +1,332 @@
+package command
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/hashicorp/vault/command/healthcheck"
+
+	"github.com/ghodss/yaml"
+	"github.com/mitchellh/cli"
+	"github.com/posener/complete"
+	"github.com/ryanuber/columnize"
+)
+
+const (
+	pkiRetOK int = iota
+	pkiRetUsage
+	pkiRetInformational
+	pkiRetWarning
+	pkiRetCritical
+	pkiRetInvalidVersion
+	pkiRetInsufficientPermissions
+)
+
+var (
+	_ cli.Command             = (*PKIHealthCheckCommand)(nil)
+	_ cli.CommandAutocomplete = (*PKIHealthCheckCommand)(nil)
+
+	// Ensure the above return codes match (outside of OK/Usage) the values in
+	// the healthcheck package.
+	_ = pkiRetInformational == int(healthcheck.ResultInformational)
+	_ = pkiRetWarning == int(healthcheck.ResultWarning)
+	_ = pkiRetCritical == int(healthcheck.ResultCritical)
+	_ = pkiRetInvalidVersion == int(healthcheck.ResultInvalidVersion)
+	_ = pkiRetInsufficientPermissions == int(healthcheck.ResultInsufficientPermissions)
+)
+
+type PKIHealthCheckCommand struct {
+	*BaseCommand
+
+	flagConfig          string
+	flagReturnIndicator string
+	flagDefaultDisabled bool
+	flagList            bool
+}
+
+func (c *PKIHealthCheckCommand) Synopsis() string {
+	return "Check PKI Secrets Engine health and operational status"
+}
+
+func (c *PKIHealthCheckCommand) Help() string {
+	helpText := `
+Usage: vault pki health-check [options] MOUNT
+
+  Reports status of the specified mount against best practices and pending
+  failures. This is an informative command and not all recommendations will
+  apply to all mounts; consider using a configuration file to tune the
+  executed health checks.
+
+  To check the pki-root mount with default configuration:
+
+      $ vault pki health-check pki-root
+
+  To specify a configuration:
+
+      $ vault pki health-check -health-config=mycorp-root.json /pki-root
+
+  Return codes indicate failure type:
+
+      0 - Everything is good.
+      1 - Usage error (check CLI parameters).
+	  2 - Informational message from a health check.
+	  3 - Warning message from a health check.
+	  4 - Critical message from a health check.
+	  5 - A version mismatch between health check and Vault Server occurred,
+	      preventing one or more health checks from being run.
+      6 - A permission denied message was returned from Vault Server for
+	      one or more health checks.
+
+` + c.Flags().Help()
+
+	return strings.TrimSpace(helpText)
+}
+
+func (c *PKIHealthCheckCommand) Flags() *FlagSets {
+	set := c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
+	f := set.NewFlagSet("Command Options")
+
+	f.StringVar(&StringVar{
+		Name:    "health-config",
+		Target:  &c.flagConfig,
+		Default: "",
+		EnvVar:  "",
+		Usage:   "Path to JSON configuration file to modify health check execution and parameters.",
+	})
+
+	f.StringVar(&StringVar{
+		Name:       "return-indicator",
+		Target:     &c.flagReturnIndicator,
+		Default:    "default",
+		EnvVar:     "",
+		Completion: complete.PredictSet("default", "informational", "warning", "critical", "permission"),
+		Usage: `Behavior of the return value:
+ - permission, for exiting with a non-zero code when the tool lacks
+               permissions or has a version mismatch with the server;
+ - critical, for exiting with a non-zero code when a check returns a
+             critical status in addition to the above;
+ - warning, for exiting with a non-zero status when a check returns a
+            warning status in addition to the above;
+ - informational, for exiting with a non-zero status when a check returns
+                  an informational status in addition to the above;
+ - default, for the default behavior based on severity of message and
+            only returning a zero exit status when all checks have passed
+			and no execution errors have occurred.
+		`,
+	})
+
+	f.BoolVar(&BoolVar{
+		Name:    "default-disabled",
+		Target:  &c.flagDefaultDisabled,
+		Default: false,
+		EnvVar:  "",
+		Usage: `When specified, results in all health checks being disabled by
+default unless enabled by the configuration file explicitly.`,
+	})
+
+	f.BoolVar(&BoolVar{
+		Name:    "list",
+		Target:  &c.flagList,
+		Default: false,
+		EnvVar:  "",
+		Usage: `When specified, no health checks are run, but all known health
+checks are printed. Still requires a positional mount argument.`,
+	})
+
+	return set
+}
+
+func (c *PKIHealthCheckCommand) isValidRetIndicator() bool {
+	switch c.flagReturnIndicator {
+	case "", "default", "informational", "warning", "critical", "permission":
+		return true
+	default:
+		return false
+	}
+}
+
+func (c *PKIHealthCheckCommand) AutocompleteArgs() complete.Predictor {
+	// Return an anything predictor here, similar to `vault write`. We
+	// don't know what values are valid for the mount path.
+	return complete.PredictAnything
+}
+
+func (c *PKIHealthCheckCommand) AutocompleteFlags() complete.Flags {
+	return c.Flags().Completions()
+}
+
+func (c *PKIHealthCheckCommand) Run(args []string) int {
+	// Parse and validate the arguments.
+	f := c.Flags()
+
+	if err := f.Parse(args); err != nil {
+		c.UI.Error(err.Error())
+		return pkiRetUsage
+	}
+
+	args = f.Args()
+	if len(args) < 1 {
+		c.UI.Error("Not enough arguments (expected mount path, got nothing)")
+		return pkiRetUsage
+	} else if len(args) > 1 {
+		c.UI.Error(fmt.Sprintf("Too many arguments (expected only mount path, got %d arguments)", len(args)))
+		for _, arg := range args {
+			if strings.HasPrefix(arg, "-") {
+				c.UI.Warn(fmt.Sprintf("Options (%v) must be specified before positional arguments (%v)", arg, args[0]))
+				break
+			}
+		}
+		return pkiRetUsage
+	}
+
+	if !c.isValidRetIndicator() {
+		c.UI.Error(fmt.Sprintf("Invalid flag -return-indicator=%v; known options are default, informational, warning, critical, and permission", c.flagReturnIndicator))
+		return pkiRetUsage
+	}
+
+	// Setup the client and the executor.
+	client, err := c.Client()
+	if err != nil {
+		c.UI.Error(err.Error())
+		return pkiRetUsage
+	}
+
+	mount := sanitizePath(args[0])
+	executor := healthcheck.NewExecutor(client, mount)
+	executor.AddCheck(healthcheck.NewCAValidityPeriodCheck())
+	executor.AddCheck(healthcheck.NewCRLValidityPeriodCheck())
+	if c.flagDefaultDisabled {
+		executor.DefaultEnabled = false
+	}
+
+	// Handle listing, if necessary.
+	if c.flagList {
+		c.UI.Output("Health Checks:")
+		for _, checker := range executor.Checkers {
+			c.UI.Output(" - " + checker.Name())
+		}
+
+		return pkiRetOK
+	}
+
+	// Handle config merging.
+	external_config := map[string]interface{}{}
+	if c.flagConfig != "" {
+		contents, err := os.ReadFile(c.flagConfig)
+		if err != nil {
+			c.UI.Error(fmt.Sprintf("Failed to read configuration file %v: %v", c.flagConfig, err))
+			return pkiRetUsage
+		}
+
+		if err := json.Unmarshal(contents, &external_config); err != nil {
+			c.UI.Error(fmt.Sprintf("Failed to parse configuration file %v: %v", c.flagConfig, err))
+			return pkiRetUsage
+		}
+	}
+
+	if err := executor.BuildConfig(external_config); err != nil {
+		c.UI.Error(fmt.Sprintf("Failed to build health check configuration: %v", err))
+		return pkiRetUsage
+	}
+
+	// Run the health checks.
+	results, err := executor.Execute()
+	if err != nil {
+		c.UI.Error(fmt.Sprintf("Failed to run health check: %v", err))
+		return pkiRetUsage
+	}
+
+	// Display the output.
+	if err := c.outputResults(results); err != nil {
+		c.UI.Error(fmt.Sprintf("Failed to render results for display: %v", err))
+	}
+
+	// Select an appropriate return code.
+	return c.selectRetCode(results)
+}
+
+func (c *PKIHealthCheckCommand) outputResults(results map[string][]*healthcheck.Result) error {
+	switch Format(c.UI) {
+	case "", "table":
+		return c.outputResultsTable(results)
+	case "json":
+		return c.outputResultsJSON(results)
+	case "yaml":
+		return c.outputResultsYAML(results)
+	default:
+		return fmt.Errorf("unknown output format: %v", Format(c.UI))
+	}
+}
+
+func (c *PKIHealthCheckCommand) outputResultsTable(results map[string][]*healthcheck.Result) error {
+	for scanner, findings := range results {
+		c.UI.Output(scanner)
+		c.UI.Output(strings.Repeat("-", len(scanner)))
+		data := []string{"status" + hopeDelim + "endpoint" + hopeDelim + "message"}
+		for _, finding := range findings {
+			row := []string{
+				finding.StatusDisplay,
+				finding.Endpoint,
+				finding.Message,
+			}
+			data = append(data, strings.Join(row, hopeDelim))
+		}
+
+		c.UI.Output(tableOutput(data, &columnize.Config{
+			Delim: hopeDelim,
+		}))
+		c.UI.Output("\n")
+	}
+
+	return nil
+}
+
+func (c *PKIHealthCheckCommand) outputResultsJSON(results map[string][]*healthcheck.Result) error {
+	bytes, err := json.MarshalIndent(results, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	c.UI.Output(string(bytes))
+	return nil
+}
+
+func (c *PKIHealthCheckCommand) outputResultsYAML(results map[string][]*healthcheck.Result) error {
+	bytes, err := yaml.Marshal(results)
+	if err != nil {
+		return err
+	}
+
+	c.UI.Output(string(bytes))
+	return nil
+}
+
+func (c *PKIHealthCheckCommand) selectRetCode(results map[string][]*healthcheck.Result) int {
+	var highestResult healthcheck.ResultStatus = healthcheck.ResultNotApplicable
+	for _, findings := range results {
+		for _, finding := range findings {
+			if finding.Status > highestResult {
+				highestResult = finding.Status
+			}
+		}
+	}
+
+	cutOff := healthcheck.ResultInformational
+	switch c.flagReturnIndicator {
+	case "", "default", "informational":
+	case "permission":
+		cutOff = healthcheck.ResultInvalidVersion
+	case "critical":
+		cutOff = healthcheck.ResultCritical
+	case "warning":
+		cutOff = healthcheck.ResultWarning
+	}
+
+	if highestResult >= cutOff {
+		return int(highestResult)
+	}
+
+	return pkiRetOK
+}
--- a/command/pki_health_check_test.go
+++ b/command/pki_health_check_test.go
@ -0,0 +1,98 @@
+package command
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"testing"
+
+	"github.com/hashicorp/vault/api"
+	"github.com/mitchellh/cli"
+
+	"github.com/stretchr/testify/require"
+)
+
+func testPKIHealthCheckCommand(tb testing.TB) (*cli.MockUi, *PKIHealthCheckCommand) {
+	tb.Helper()
+
+	ui := cli.NewMockUi()
+	return ui, &PKIHealthCheckCommand{
+		BaseCommand: &BaseCommand{
+			UI: ui,
+		},
+	}
+}
+
+func TestPKIHC_Run(t *testing.T) {
+	client, closer := testVaultServer(t)
+	defer closer()
+
+	if err := client.Sys().Mount("pki", &api.MountInput{
+		Type: "pki",
+	}); err != nil {
+		t.Fatalf("pki mount error: %#v", err)
+	}
+
+	if resp, err := client.Logical().Write("pki/root/generate/internal", map[string]interface{}{
+		"key_type":    "ec",
+		"common_name": "Root X1",
+		"ttl":         "876h",
+	}); err != nil || resp == nil {
+		t.Fatalf("failed to prime CA: %v", err)
+	}
+
+	if _, err := client.Logical().Read("pki/crl/rotate"); err != nil {
+		t.Fatalf("failed to rotate CRLs: %v", err)
+	}
+
+	stdout := bytes.NewBuffer(nil)
+	stderr := bytes.NewBuffer(nil)
+	runOpts := &RunOptions{
+		Stdout: stdout,
+		Stderr: stderr,
+		Client: client,
+	}
+
+	code := RunCustom([]string{"pki", "health-check", "-format=json", "pki"}, runOpts)
+	combined := stdout.String() + stderr.String()
+
+	var results map[string][]map[string]interface{}
+	if err := json.Unmarshal([]byte(combined), &results); err != nil {
+		t.Fatalf("failed to decode json (ret %v): %v\njson:\n%v", code, err, combined)
+	}
+
+	t.Log(combined)
+
+	expected := map[string][]map[string]interface{}{
+		"ca_validity_period": {
+			{
+				"status": "critical",
+			},
+		},
+		"crl_validity_period": {
+			{
+				"status": "ok",
+			},
+			{
+				"status": "ok",
+			},
+		},
+	}
+
+	for test, subtest := range expected {
+		actual, ok := results[test]
+		require.True(t, ok, fmt.Sprintf("expected top-level test %v to be present", test))
+		require.NotNil(t, actual, fmt.Sprintf("expected top-level test %v to be non-empty; wanted wireframe format %v", test, subtest))
+		require.Equal(t, len(subtest), len(actual), fmt.Sprintf("top-level test %v has different number of results %v in wireframe, %v in test output\nwireframe: %v\noutput: %v\n", test, len(subtest), len(actual), subtest, actual))
+
+		for index, subset := range subtest {
+			for key, value := range subset {
+				a_value, present := actual[index][key]
+				require.True(t, present)
+				if value != nil {
+					require.Equal(t, value, a_value)
+				}
+			}
+		}
+	}
+}