diff --git a/api/logical.go b/api/logical.go index d2e5bb5e5..1a720cbf2 100644 --- a/api/logical.go +++ b/api/logical.go @@ -66,6 +66,27 @@ func (c *Logical) ReadWithDataWithContext(ctx context.Context, path string, data defer cancelFunc() resp, err := c.readRawWithDataWithContext(ctx, path, data) + return c.ParseRawResponseAndCloseBody(resp, err) +} + +func (c *Logical) ReadRaw(path string) (*Response, error) { + return c.ReadRawWithData(path, nil) +} + +func (c *Logical) ReadRawWithData(path string, data map[string][]string) (*Response, error) { + return c.ReadRawWithDataWithContext(context.Background(), path, data) +} + +func (c *Logical) ReadRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) { + // See note in client.go, RawRequestWithContext for why we do not call + // Cancel here. The difference between these two methods are that the + // former takes a Request object directly, whereas this builds one + // up for the caller. + ctx, _ = c.c.withConfiguredTimeout(ctx) + return c.readRawWithDataWithContext(ctx, path, data) +} + +func (c *Logical) ParseRawResponseAndCloseBody(resp *Response, err error) (*Secret, error) { if resp != nil { defer resp.Body.Close() } @@ -90,21 +111,6 @@ func (c *Logical) ReadWithDataWithContext(ctx context.Context, path string, data return ParseSecret(resp.Body) } -func (c *Logical) ReadRaw(path string) (*Response, error) { - return c.ReadRawWithData(path, nil) -} - -func (c *Logical) ReadRawWithData(path string, data map[string][]string) (*Response, error) { - return c.ReadRawWithDataWithContext(context.Background(), path, data) -} - -func (c *Logical) ReadRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) { - ctx, cancelFunc := c.c.withConfiguredTimeout(ctx) - defer cancelFunc() - - return c.readRawWithDataWithContext(ctx, path, data) -} - func (c *Logical) readRawWithDataWithContext(ctx context.Context, path string, data map[string][]string) (*Response, error) { r := c.c.NewRequest(http.MethodGet, "/v1/"+path) diff --git a/changelog/17750.txt b/changelog/17750.txt new file mode 100644 index 000000000..af12458f4 --- /dev/null +++ b/changelog/17750.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli/pki: Add health-check subcommand to evaluate the health of a PKI instance. +``` diff --git a/command/commands.go b/command/commands.go index 4e5500b12..40dc937eb 100644 --- a/command/commands.go +++ b/command/commands.go @@ -787,6 +787,11 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) { ShutdownCh: MakeShutdownCh(), }, nil }, + "pki health-check": func() (cli.Command, error) { + return &PKIHealthCheckCommand{ + BaseCommand: getBaseCommand(), + }, nil + }, } // Disabled by default until functional diff --git a/command/healthcheck/common.go b/command/healthcheck/common.go new file mode 100644 index 000000000..af5b47292 --- /dev/null +++ b/command/healthcheck/common.go @@ -0,0 +1,276 @@ +/* + * The healthcheck package attempts to allow generic checks of arbitrary + * engines, while providing a common framework with some performance + * efficiencies in mind. + * + * The core of this package is the Executor context; a caller would + * provision a set of checks, an API client, and a configuration, + * which the executor would use to decide which checks to execute + * and how. + * + * Checks are based around a series of remote paths that are fetched by + * the client; these are broken into two categories: static paths, which + * can always be fetched; and dynamic paths, which the check fetches based + * on earlier results. + * + * For instance, a basic PKI CA lifetime check will have static fetch against + * the list of CAs, and a dynamic fetch, using that earlier list, to fetch the + * PEMs of all CAs. + * + * This allows health checks to share data: many PKI checks will need the + * issuer list and so repeatedly fetching this may result in a performance + * impact. + */ + +package healthcheck + +import ( + "fmt" + "strings" + + "github.com/hashicorp/vault/api" + "github.com/hashicorp/vault/sdk/logical" +) + +type Executor struct { + Client *api.Client + Mount string + DefaultEnabled bool + + Config map[string]map[string]interface{} + + Resources map[string]map[logical.Operation]*PathFetch + + Checkers []Check +} + +func NewExecutor(client *api.Client, mount string) *Executor { + return &Executor{ + Client: client, + DefaultEnabled: true, + Mount: mount, + Config: make(map[string]map[string]interface{}), + Resources: make(map[string]map[logical.Operation]*PathFetch), + } +} + +func (e *Executor) AddCheck(c Check) { + e.Checkers = append(e.Checkers, c) +} + +func (e *Executor) BuildConfig(external map[string]interface{}) error { + merged := e.Config + + for index, checker := range e.Checkers { + name := checker.Name() + if _, present := merged[name]; name == "" || present { + return fmt.Errorf("bad checker %v: name is empty or already present: %v", index, name) + } + + // Fetch the default configuration; if the check returns enabled + // status, verify it matches our expectations (in the event it should + // be disabled by default), otherwise, add it in. + config := checker.DefaultConfig() + enabled, present := config["enabled"] + if !present { + config["enabled"] = e.DefaultEnabled + } else if enabled.(bool) && !e.DefaultEnabled { + config["enabled"] = e.DefaultEnabled + } + + // Now apply any external config for this check. + if econfig, present := external[name]; present { + for param, evalue := range econfig.(map[string]interface{}) { + if _, ok := config[param]; !ok { + // Assumption: default configs have all possible + // configuration options. This external config has + // an unknown option, so we want to error out. + return fmt.Errorf("unknown configuration option for %v: %v", name, param) + } + + config[param] = evalue + } + } + + // Now apply it and save it. + if err := checker.LoadConfig(config); err != nil { + return fmt.Errorf("error saving merged config for %v: %w", name, err) + } + merged[name] = config + } + + return nil +} + +func (e *Executor) Execute() (map[string][]*Result, error) { + ret := make(map[string][]*Result) + for _, checker := range e.Checkers { + if !checker.IsEnabled() { + continue + } + + if err := checker.FetchResources(e); err != nil { + return nil, err + } + + results, err := checker.Evaluate(e) + if err != nil { + return nil, err + } + + for _, result := range results { + result.Endpoint = e.templatePath(result.Endpoint) + result.StatusDisplay = ResultStatusNameMap[result.Status] + } + + ret[checker.Name()] = results + } + + return ret, nil +} + +func (e *Executor) templatePath(path string) string { + return strings.ReplaceAll(path, "{{mount}}", e.Mount) +} + +func (e *Executor) FetchIfNotFetched(op logical.Operation, rawPath string) (*PathFetch, error) { + path := e.templatePath(rawPath) + + byOp, present := e.Resources[path] + if present && byOp != nil { + result, present := byOp[op] + if present && result != nil { + return result, result.FetchSurfaceError() + } + } + + // Must not exist in cache; create it. + if byOp == nil { + e.Resources[path] = make(map[logical.Operation]*PathFetch) + } + + ret := &PathFetch{ + Operation: op, + Path: path, + ParsedCache: make(map[string]interface{}), + } + + data := map[string][]string{} + if op == logical.ListOperation { + data["list"] = []string{"true"} + } else if op != logical.ReadOperation { + return nil, fmt.Errorf("unknown operation: %v on %v", op, path) + } + + response, err := e.Client.Logical().ReadRawWithData(path, data) + ret.Response = response + if err != nil { + ret.FetchError = err + } else { + // Not all secrets will parse correctly. Sometimes we really want + // to fetch a raw endpoint, sometimes we're run with a bad mount + // or missing permissions. + secret, secretErr := e.Client.Logical().ParseRawResponseAndCloseBody(response, err) + if secretErr != nil { + ret.SecretParseError = secretErr + } else { + ret.Secret = secret + } + } + + e.Resources[path][op] = ret + return ret, ret.FetchSurfaceError() +} + +type PathFetch struct { + Operation logical.Operation + Path string + Response *api.Response + FetchError error + Secret *api.Secret + SecretParseError error + ParsedCache map[string]interface{} +} + +func (p *PathFetch) IsOK() bool { + return p.FetchError == nil && p.Response != nil +} + +func (p *PathFetch) IsSecretOK() bool { + return p.IsOK() && p.SecretParseError == nil && p.Secret != nil +} + +func (p *PathFetch) FetchSurfaceError() error { + if p.IsOK() || p.IsSecretPermissionsError() || p.IsUnsupportedPathError() || p.IsMissingResource() { + return nil + } + + if strings.Contains(p.FetchError.Error(), "route entry not found") { + return fmt.Errorf("Error making API request: was a bad mount given?\n\nOperation: %v\nPath: %v\nOriginal Error:\n%w", p.Operation, p.Path, p.FetchError) + } + + return p.FetchError +} + +func (p *PathFetch) IsSecretPermissionsError() bool { + return !p.IsOK() && strings.Contains(p.FetchError.Error(), "permission denied") +} + +func (p *PathFetch) IsUnsupportedPathError() bool { + return !p.IsOK() && strings.Contains(p.FetchError.Error(), "unsupported path") +} + +func (p *PathFetch) IsMissingResource() bool { + return !p.IsOK() && strings.Contains(p.FetchError.Error(), "unable to find") +} + +type Check interface { + Name() string + IsEnabled() bool + + DefaultConfig() map[string]interface{} + LoadConfig(config map[string]interface{}) error + + FetchResources(e *Executor) error + + Evaluate(e *Executor) ([]*Result, error) +} + +type ResultStatus int + +const ( + ResultNotApplicable ResultStatus = iota + ResultOK + ResultInformational + ResultWarning + ResultCritical + ResultInvalidVersion + ResultInsufficientPermissions +) + +var ResultStatusNameMap = map[ResultStatus]string{ + ResultNotApplicable: "not_applicable", + ResultOK: "ok", + ResultInformational: "informational", + ResultWarning: "warning", + ResultCritical: "critical", + ResultInvalidVersion: "invalid_version", + ResultInsufficientPermissions: "insufficient_permissions", +} + +var NameResultStatusMap = map[string]ResultStatus{ + "not_applicable": ResultNotApplicable, + "ok": ResultOK, + "informational": ResultInformational, + "warning": ResultWarning, + "critical": ResultCritical, + "invalid_version": ResultInvalidVersion, + "insufficient_permissions": ResultInsufficientPermissions, +} + +type Result struct { + Status ResultStatus `json:"status_code"` + StatusDisplay string `json:"status"` + Endpoint string `json:"endpoint,omitempty"` + Message string `json:"message,omitempty"` +} diff --git a/command/healthcheck/pki.go b/command/healthcheck/pki.go new file mode 100644 index 000000000..e7bfc82a9 --- /dev/null +++ b/command/healthcheck/pki.go @@ -0,0 +1,128 @@ +package healthcheck + +import ( + "crypto/x509" + "encoding/pem" + "fmt" + + "github.com/hashicorp/vault/sdk/logical" +) + +func pkiFetchIssuers(e *Executor, versionError func()) (bool, *PathFetch, []string, error) { + issuersRet, err := e.FetchIfNotFetched(logical.ListOperation, "/{{mount}}/issuers") + if err != nil { + return true, nil, nil, err + } + + if !issuersRet.IsSecretOK() { + if issuersRet.IsUnsupportedPathError() { + versionError() + } + + return true, nil, nil, nil + } + + if len(issuersRet.ParsedCache) == 0 { + var issuers []string + for _, rawIssuerId := range issuersRet.Secret.Data["keys"].([]interface{}) { + issuers = append(issuers, rawIssuerId.(string)) + } + issuersRet.ParsedCache["issuers"] = issuers + } + + return false, issuersRet, issuersRet.ParsedCache["issuers"].([]string), nil +} + +func parsePEM(contents string) ([]byte, error) { + // Need to parse out the issuer from its PEM format. + pemBlock, _ := pem.Decode([]byte(contents)) + if pemBlock == nil { + return nil, fmt.Errorf("invalid PEM block") + } + + return pemBlock.Bytes, nil +} + +func parsePEMCert(contents string) (*x509.Certificate, error) { + parsed, err := parsePEM(contents) + if err != nil { + return nil, err + } + + cert, err := x509.ParseCertificate(parsed) + if err != nil { + return nil, fmt.Errorf("invalid certificate: %w", err) + } + + return cert, nil +} + +func parsePEMCRL(contents string) (*x509.RevocationList, error) { + parsed, err := parsePEM(contents) + if err != nil { + return nil, err + } + + crl, err := x509.ParseRevocationList(parsed) + if err != nil { + return nil, fmt.Errorf("invalid CRL: %w", err) + } + + return crl, nil +} + +func pkiFetchIssuer(e *Executor, issuer string, versionError func()) (bool, *PathFetch, *x509.Certificate, error) { + issuerRet, err := e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/issuer/"+issuer+"/json") + if err != nil { + return true, nil, nil, err + } + + if !issuerRet.IsSecretOK() { + if issuerRet.IsUnsupportedPathError() { + versionError() + } + return true, nil, nil, nil + } + + if len(issuerRet.ParsedCache) == 0 { + cert, err := parsePEMCert(issuerRet.Secret.Data["certificate"].(string)) + if err != nil { + return true, nil, nil, fmt.Errorf("unable to parse issuer %v's certificate: %w", issuer, err) + } + + issuerRet.ParsedCache["certificate"] = cert + } + + return false, issuerRet, issuerRet.ParsedCache["certificate"].(*x509.Certificate), nil +} + +func pkiFetchIssuerCRL(e *Executor, issuer string, delta bool, versionError func()) (bool, *PathFetch, *x509.RevocationList, error) { + path := "/{{mount}}/issuer/" + issuer + "/crl" + name := "CRL" + if delta { + path += "/delta" + name = "Delta CRL" + } + + crlRet, err := e.FetchIfNotFetched(logical.ReadOperation, path) + if err != nil { + return true, nil, nil, err + } + + if !crlRet.IsSecretOK() { + if crlRet.IsUnsupportedPathError() { + versionError() + } + return true, nil, nil, nil + } + + if len(crlRet.ParsedCache) == 0 { + crl, err := parsePEMCRL(crlRet.Secret.Data["crl"].(string)) + if err != nil { + return true, nil, nil, fmt.Errorf("unable to parse issuer %v's %v: %w", issuer, name, err) + } + crlRet.ParsedCache["crl"] = crl + } + + return false, crlRet, crlRet.ParsedCache["crl"].(*x509.RevocationList), nil +} diff --git a/command/healthcheck/pki_ca_validity_period.go b/command/healthcheck/pki_ca_validity_period.go new file mode 100644 index 000000000..63c7e1a75 --- /dev/null +++ b/command/healthcheck/pki_ca_validity_period.go @@ -0,0 +1,166 @@ +package healthcheck + +import ( + "bytes" + "crypto/x509" + "fmt" + "strings" + "time" + + "github.com/hashicorp/go-secure-stdlib/parseutil" +) + +type CAValidityPeriod struct { + Enabled bool + + RootExpiries map[ResultStatus]time.Duration + IntermediateExpieries map[ResultStatus]time.Duration + + UnsupportedVersion bool + + Issuers map[string]*x509.Certificate +} + +func NewCAValidityPeriodCheck() Check { + return &CAValidityPeriod{ + RootExpiries: make(map[ResultStatus]time.Duration, 3), + IntermediateExpieries: make(map[ResultStatus]time.Duration, 3), + Issuers: make(map[string]*x509.Certificate), + } +} + +func (h *CAValidityPeriod) Name() string { + return "ca_validity_period" +} + +func (h *CAValidityPeriod) IsEnabled() bool { + return h.Enabled +} + +func (h *CAValidityPeriod) DefaultConfig() map[string]interface{} { + return map[string]interface{}{ + "root_expiry_critical": "180d", + "intermediate_expiry_critical": "30d", + "root_expiry_warning": "365d", + "intermediate_expiry_warning": "60d", + "root_expiry_informational": "730d", + "intermediate_expiry_informational": "180d", + } +} + +func (h *CAValidityPeriod) LoadConfig(config map[string]interface{}) error { + parameters := []string{ + "root_expiry_critical", + "intermediate_expiry_critical", + "root_expiry_warning", + "intermediate_expiry_warning", + "root_expiry_informational", + "intermediate_expiry_informational", + } + for _, parameter := range parameters { + name_split := strings.Split(parameter, "_") + if len(name_split) != 3 || name_split[1] != "expiry" { + return fmt.Errorf("bad parameter: %v / %v / %v", parameter, len(name_split), name_split[1]) + } + + status, present := NameResultStatusMap[name_split[2]] + if !present { + return fmt.Errorf("bad parameter: %v's type %v isn't in name map", parameter, name_split[2]) + } + + value_raw, present := config[parameter] + if !present { + return fmt.Errorf("parameter not present in config; Executor should've handled this for us: %v", parameter) + } + + value, err := parseutil.ParseDurationSecond(value_raw) + if err != nil { + return fmt.Errorf("failed to parse parameter (%v=%v): %w", parameter, value_raw, err) + } + + if name_split[0] == "root" { + h.RootExpiries[status] = value + } else if name_split[0] == "intermediate" { + h.IntermediateExpieries[status] = value + } else { + return fmt.Errorf("bad parameter: %v's CA type isn't root/intermediate: %v", parameters, name_split[0]) + } + } + + enabled, err := parseutil.ParseBool(config["enabled"]) + if err != nil { + return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err) + } + h.Enabled = enabled + + return nil +} + +func (h *CAValidityPeriod) FetchResources(e *Executor) error { + exit, _, issuers, err := pkiFetchIssuers(e, func() { + h.UnsupportedVersion = true + }) + if exit { + return err + } + + for _, issuer := range issuers { + skip, _, cert, err := pkiFetchIssuer(e, issuer, func() { + h.UnsupportedVersion = true + }) + if skip { + if err != nil { + return err + } + continue + } + + h.Issuers[issuer] = cert + } + + return nil +} + +func (h *CAValidityPeriod) Evaluate(e *Executor) (results []*Result, err error) { + if h.UnsupportedVersion { + ret := Result{ + Status: ResultInvalidVersion, + Endpoint: "/{{mount}}/issuers", + Message: "This health check requires Vault 1.11+ but an earlier version of Vault Server was contacted, preventing this health check from running.", + } + return []*Result{&ret}, nil + } + + now := time.Now() + + for name, cert := range h.Issuers { + var ret Result + ret.Status = ResultOK + ret.Endpoint = "/{{mount}}/issuer/" + name + ret.Message = fmt.Sprintf("Issuer's validity (%v) is OK", cert.NotAfter.Format("2006-01-02")) + + hasSelfReference := bytes.Equal(cert.RawSubject, cert.RawIssuer) + isSelfSigned := cert.CheckSignatureFrom(cert) == nil + isRoot := hasSelfReference && isSelfSigned + + for _, criticality := range []ResultStatus{ResultCritical, ResultWarning, ResultInformational} { + var d time.Duration + if isRoot { + d = h.RootExpiries[criticality] + } else { + d = h.IntermediateExpieries[criticality] + } + + windowExpiry := now.Add(d) + if cert.NotAfter.Before(windowExpiry) { + ret.Status = criticality + ret.Message = fmt.Sprintf("Issuer's validity is outside of the suggested rotation window: issuer is valid until %v but expires within %v (ending on %v). It is suggested to start rotating this issuer to new key material to avoid future downtime caused by this current issuer expiring.", cert.NotAfter.Format("2006-01-02"), FormatDuration(d), windowExpiry.Format("2006-01-02")) + break + } + } + + results = append(results, &ret) + } + + return +} diff --git a/command/healthcheck/pki_crl_validity_period.go b/command/healthcheck/pki_crl_validity_period.go new file mode 100644 index 000000000..4f4b6f91e --- /dev/null +++ b/command/healthcheck/pki_crl_validity_period.go @@ -0,0 +1,205 @@ +package healthcheck + +import ( + "crypto/x509" + "fmt" + "time" + + "github.com/hashicorp/vault/sdk/logical" + + "github.com/hashicorp/go-secure-stdlib/parseutil" +) + +type CRLValidityPeriod struct { + Enabled bool + + CRLExpiryPercentage int + DeltaCRLExpiryPercentage int + + UnsupportedVersion bool + NoDeltas bool + + CRLs map[string]*x509.RevocationList + DeltaCRLs map[string]*x509.RevocationList + + CRLConfig *PathFetch +} + +func NewCRLValidityPeriodCheck() Check { + return &CRLValidityPeriod{ + CRLs: make(map[string]*x509.RevocationList), + DeltaCRLs: make(map[string]*x509.RevocationList), + } +} + +func (h *CRLValidityPeriod) Name() string { + return "crl_validity_period" +} + +func (h *CRLValidityPeriod) IsEnabled() bool { + return h.Enabled +} + +func (h *CRLValidityPeriod) DefaultConfig() map[string]interface{} { + return map[string]interface{}{ + "crl_expiry_pct_critical": "95", + "delta_crl_expiry_pct_critical": "95", + } +} + +func (h *CRLValidityPeriod) LoadConfig(config map[string]interface{}) error { + value, err := parseutil.SafeParseIntRange(config["crl_expiry_pct_critical"], 1, 99) + if err != nil { + return fmt.Errorf("error parsing %v.crl_expiry_pct_critical=%v: %w", h.Name(), config["crl_expiry_pct_critical"], err) + } + h.CRLExpiryPercentage = int(value) + + value, err = parseutil.SafeParseIntRange(config["delta_crl_expiry_pct_critical"], 1, 99) + if err != nil { + return fmt.Errorf("error parsing %v.delta_crl_expiry_pct_critical=%v: %w", h.Name(), config["delta_crl_expiry_pct_critical"], err) + } + h.DeltaCRLExpiryPercentage = int(value) + + enabled, err := parseutil.ParseBool(config["enabled"]) + if err != nil { + return fmt.Errorf("error parsing %v.enabled: %w", h.Name(), err) + } + h.Enabled = enabled + + return nil +} + +func (h *CRLValidityPeriod) FetchResources(e *Executor) error { + exit, _, issuers, err := pkiFetchIssuers(e, func() { + h.UnsupportedVersion = true + }) + if exit { + return err + } + + for _, issuer := range issuers { + exit, _, crl, err := pkiFetchIssuerCRL(e, issuer, false, func() { + h.UnsupportedVersion = true + }) + if exit { + if err != nil { + return err + } + continue + } + + h.CRLs[issuer] = crl + + exit, _, delta, err := pkiFetchIssuerCRL(e, issuer, true, func() { + h.NoDeltas = true + }) + if exit { + if err != nil { + return err + } + continue + } + + h.DeltaCRLs[issuer] = delta + } + + // Check if the issuer is fetched yet. + configRet, err := e.FetchIfNotFetched(logical.ReadOperation, "/{{mount}}/config/crl") + if err != nil { + return err + } + + h.CRLConfig = configRet + + return nil +} + +func (h *CRLValidityPeriod) Evaluate(e *Executor) (results []*Result, err error) { + if h.UnsupportedVersion { + ret := Result{ + Status: ResultInvalidVersion, + Endpoint: "/{{mount}}/issuers", + Message: "This health check requires Vault 1.11+ but an earlier version of Vault Server was contacted, preventing this health check from running.", + } + return []*Result{&ret}, nil + } + + now := time.Now() + crlDisabled := false + if h.CRLConfig != nil { + if h.CRLConfig.IsSecretPermissionsError() { + ret := Result{ + Status: ResultInsufficientPermissions, + Endpoint: "/{{mount}}/config/crl", + Message: "This prevents the health check from seeing if the CRL is disabled and dropping the severity of this check accordingly.", + } + + if e.Client.Token() == "" { + ret.Message = "No token available so unable read authenticated CRL configuration for this mount. " + ret.Message + } else { + ret.Message = "This token lacks permission to read the CRL configuration for this mount. " + ret.Message + } + + results = append(results, &ret) + } else if h.CRLConfig.Secret != nil && h.CRLConfig.Secret.Data["disabled"] != nil { + crlDisabled = h.CRLConfig.Secret.Data["disabled"].(bool) + } + } + + if h.NoDeltas && len(h.DeltaCRLs) == 0 { + ret := Result{ + Status: ResultInvalidVersion, + Endpoint: "/{{mount}}/issuer/*/crl/delta", + Message: "This health check validates Delta CRLs on Vault 1.12+, but an earlier version of Vault was used. No results about delta CRL validity will be returned.", + } + results = append(results, &ret) + } + + for name, crl := range h.CRLs { + var ret Result + ret.Status = ResultOK + ret.Endpoint = "/{{mount}}/issuer/" + name + "/crl" + ret.Message = fmt.Sprintf("CRL's validity (%v to %v) is OK.", crl.ThisUpdate.Format("2006-01-02"), crl.NextUpdate.Format("2006-01-02")) + + used := now.Sub(crl.ThisUpdate) + total := crl.NextUpdate.Sub(crl.ThisUpdate) + ratio := time.Duration((int64(total) * int64(h.CRLExpiryPercentage)) / int64(100)) + if used >= ratio { + expWhen := crl.ThisUpdate.Add(ratio) + ret.Status = ResultCritical + ret.Message = fmt.Sprintf("CRL's validity is outside of suggested rotation window: CRL's next update is expected at %v, but expires within %v%% of validity window (starting on %v and ending on %v). It is suggested to rotate this CRL and start propagating it to hosts to avoid any issues caused by stale CRLs.", crl.NextUpdate.Format("2006-01-02"), h.CRLExpiryPercentage, crl.ThisUpdate.Format("2006-01-02"), expWhen.Format("2006-01-02")) + + if crlDisabled == true { + ret.Status = ResultInformational + ret.Message += " Because the CRL is disabled, this is less of a concern." + } + } + + results = append(results, &ret) + } + + for name, crl := range h.DeltaCRLs { + var ret Result + ret.Status = ResultOK + ret.Endpoint = "/{{mount}}/issuer/" + name + "/crl/delta" + ret.Message = fmt.Sprintf("Delta CRL's validity (%v to %v) is OK.", crl.ThisUpdate.Format("2006-01-02"), crl.NextUpdate.Format("2006-01-02")) + + used := now.Sub(crl.ThisUpdate) + total := crl.NextUpdate.Sub(crl.ThisUpdate) + ratio := time.Duration((int64(total) * int64(h.DeltaCRLExpiryPercentage)) / int64(100)) + if used >= ratio { + expWhen := crl.ThisUpdate.Add(ratio) + ret.Status = ResultCritical + ret.Message = fmt.Sprintf("Delta CRL's validity is outside of suggested rotation window: Delta CRL's next update is expected at %v, but expires within %v%% of validity window (starting on %v and ending on %v). It is suggested to rotate this Delta CRL and start propagating it to hosts to avoid any issues caused by stale CRLs.", crl.NextUpdate.Format("2006-01-02"), h.CRLExpiryPercentage, crl.ThisUpdate.Format("2006-01-02"), expWhen.Format("2006-01-02")) + + if crlDisabled == true { + ret.Status = ResultInformational + ret.Message += " Because the CRL is disabled, this is less of a concern." + } + } + + results = append(results, &ret) + } + + return +} diff --git a/command/healthcheck/util.go b/command/healthcheck/util.go new file mode 100644 index 000000000..632fe1a28 --- /dev/null +++ b/command/healthcheck/util.go @@ -0,0 +1,39 @@ +package healthcheck + +import ( + "fmt" + "time" +) + +var ( + oneDay = 24 * time.Hour + oneWeek = 7 * oneDay + oneMonth = 30 * oneDay + oneYear = 365 * oneDay +) + +var suffixDurationMap = map[string]time.Duration{ + "y": oneYear, + "mo": oneMonth, + "w": oneWeek, + "d": oneDay, +} +var orderedSuffixes = []string{"y", "mo", "w", "d"} + +func FormatDuration(d time.Duration) string { + var result string + for _, suffix := range orderedSuffixes { + unit := suffixDurationMap[suffix] + if d > unit { + quantity := int64(d / unit) + result = fmt.Sprintf("%v%v%v", quantity, suffix, result) + d = d - (time.Duration(quantity) * unit) + } + } + + if d > 0 { + result = d.String() + result + } + + return result +} diff --git a/command/pki_health_check.go b/command/pki_health_check.go new file mode 100644 index 000000000..94f6ff881 --- /dev/null +++ b/command/pki_health_check.go @@ -0,0 +1,332 @@ +package command + +import ( + "encoding/json" + "fmt" + "os" + "strings" + + "github.com/hashicorp/vault/command/healthcheck" + + "github.com/ghodss/yaml" + "github.com/mitchellh/cli" + "github.com/posener/complete" + "github.com/ryanuber/columnize" +) + +const ( + pkiRetOK int = iota + pkiRetUsage + pkiRetInformational + pkiRetWarning + pkiRetCritical + pkiRetInvalidVersion + pkiRetInsufficientPermissions +) + +var ( + _ cli.Command = (*PKIHealthCheckCommand)(nil) + _ cli.CommandAutocomplete = (*PKIHealthCheckCommand)(nil) + + // Ensure the above return codes match (outside of OK/Usage) the values in + // the healthcheck package. + _ = pkiRetInformational == int(healthcheck.ResultInformational) + _ = pkiRetWarning == int(healthcheck.ResultWarning) + _ = pkiRetCritical == int(healthcheck.ResultCritical) + _ = pkiRetInvalidVersion == int(healthcheck.ResultInvalidVersion) + _ = pkiRetInsufficientPermissions == int(healthcheck.ResultInsufficientPermissions) +) + +type PKIHealthCheckCommand struct { + *BaseCommand + + flagConfig string + flagReturnIndicator string + flagDefaultDisabled bool + flagList bool +} + +func (c *PKIHealthCheckCommand) Synopsis() string { + return "Check PKI Secrets Engine health and operational status" +} + +func (c *PKIHealthCheckCommand) Help() string { + helpText := ` +Usage: vault pki health-check [options] MOUNT + + Reports status of the specified mount against best practices and pending + failures. This is an informative command and not all recommendations will + apply to all mounts; consider using a configuration file to tune the + executed health checks. + + To check the pki-root mount with default configuration: + + $ vault pki health-check pki-root + + To specify a configuration: + + $ vault pki health-check -health-config=mycorp-root.json /pki-root + + Return codes indicate failure type: + + 0 - Everything is good. + 1 - Usage error (check CLI parameters). + 2 - Informational message from a health check. + 3 - Warning message from a health check. + 4 - Critical message from a health check. + 5 - A version mismatch between health check and Vault Server occurred, + preventing one or more health checks from being run. + 6 - A permission denied message was returned from Vault Server for + one or more health checks. + +` + c.Flags().Help() + + return strings.TrimSpace(helpText) +} + +func (c *PKIHealthCheckCommand) Flags() *FlagSets { + set := c.flagSet(FlagSetHTTP | FlagSetOutputFormat) + f := set.NewFlagSet("Command Options") + + f.StringVar(&StringVar{ + Name: "health-config", + Target: &c.flagConfig, + Default: "", + EnvVar: "", + Usage: "Path to JSON configuration file to modify health check execution and parameters.", + }) + + f.StringVar(&StringVar{ + Name: "return-indicator", + Target: &c.flagReturnIndicator, + Default: "default", + EnvVar: "", + Completion: complete.PredictSet("default", "informational", "warning", "critical", "permission"), + Usage: `Behavior of the return value: + - permission, for exiting with a non-zero code when the tool lacks + permissions or has a version mismatch with the server; + - critical, for exiting with a non-zero code when a check returns a + critical status in addition to the above; + - warning, for exiting with a non-zero status when a check returns a + warning status in addition to the above; + - informational, for exiting with a non-zero status when a check returns + an informational status in addition to the above; + - default, for the default behavior based on severity of message and + only returning a zero exit status when all checks have passed + and no execution errors have occurred. + `, + }) + + f.BoolVar(&BoolVar{ + Name: "default-disabled", + Target: &c.flagDefaultDisabled, + Default: false, + EnvVar: "", + Usage: `When specified, results in all health checks being disabled by +default unless enabled by the configuration file explicitly.`, + }) + + f.BoolVar(&BoolVar{ + Name: "list", + Target: &c.flagList, + Default: false, + EnvVar: "", + Usage: `When specified, no health checks are run, but all known health +checks are printed. Still requires a positional mount argument.`, + }) + + return set +} + +func (c *PKIHealthCheckCommand) isValidRetIndicator() bool { + switch c.flagReturnIndicator { + case "", "default", "informational", "warning", "critical", "permission": + return true + default: + return false + } +} + +func (c *PKIHealthCheckCommand) AutocompleteArgs() complete.Predictor { + // Return an anything predictor here, similar to `vault write`. We + // don't know what values are valid for the mount path. + return complete.PredictAnything +} + +func (c *PKIHealthCheckCommand) AutocompleteFlags() complete.Flags { + return c.Flags().Completions() +} + +func (c *PKIHealthCheckCommand) Run(args []string) int { + // Parse and validate the arguments. + f := c.Flags() + + if err := f.Parse(args); err != nil { + c.UI.Error(err.Error()) + return pkiRetUsage + } + + args = f.Args() + if len(args) < 1 { + c.UI.Error("Not enough arguments (expected mount path, got nothing)") + return pkiRetUsage + } else if len(args) > 1 { + c.UI.Error(fmt.Sprintf("Too many arguments (expected only mount path, got %d arguments)", len(args))) + for _, arg := range args { + if strings.HasPrefix(arg, "-") { + c.UI.Warn(fmt.Sprintf("Options (%v) must be specified before positional arguments (%v)", arg, args[0])) + break + } + } + return pkiRetUsage + } + + if !c.isValidRetIndicator() { + c.UI.Error(fmt.Sprintf("Invalid flag -return-indicator=%v; known options are default, informational, warning, critical, and permission", c.flagReturnIndicator)) + return pkiRetUsage + } + + // Setup the client and the executor. + client, err := c.Client() + if err != nil { + c.UI.Error(err.Error()) + return pkiRetUsage + } + + mount := sanitizePath(args[0]) + executor := healthcheck.NewExecutor(client, mount) + executor.AddCheck(healthcheck.NewCAValidityPeriodCheck()) + executor.AddCheck(healthcheck.NewCRLValidityPeriodCheck()) + if c.flagDefaultDisabled { + executor.DefaultEnabled = false + } + + // Handle listing, if necessary. + if c.flagList { + c.UI.Output("Health Checks:") + for _, checker := range executor.Checkers { + c.UI.Output(" - " + checker.Name()) + } + + return pkiRetOK + } + + // Handle config merging. + external_config := map[string]interface{}{} + if c.flagConfig != "" { + contents, err := os.ReadFile(c.flagConfig) + if err != nil { + c.UI.Error(fmt.Sprintf("Failed to read configuration file %v: %v", c.flagConfig, err)) + return pkiRetUsage + } + + if err := json.Unmarshal(contents, &external_config); err != nil { + c.UI.Error(fmt.Sprintf("Failed to parse configuration file %v: %v", c.flagConfig, err)) + return pkiRetUsage + } + } + + if err := executor.BuildConfig(external_config); err != nil { + c.UI.Error(fmt.Sprintf("Failed to build health check configuration: %v", err)) + return pkiRetUsage + } + + // Run the health checks. + results, err := executor.Execute() + if err != nil { + c.UI.Error(fmt.Sprintf("Failed to run health check: %v", err)) + return pkiRetUsage + } + + // Display the output. + if err := c.outputResults(results); err != nil { + c.UI.Error(fmt.Sprintf("Failed to render results for display: %v", err)) + } + + // Select an appropriate return code. + return c.selectRetCode(results) +} + +func (c *PKIHealthCheckCommand) outputResults(results map[string][]*healthcheck.Result) error { + switch Format(c.UI) { + case "", "table": + return c.outputResultsTable(results) + case "json": + return c.outputResultsJSON(results) + case "yaml": + return c.outputResultsYAML(results) + default: + return fmt.Errorf("unknown output format: %v", Format(c.UI)) + } +} + +func (c *PKIHealthCheckCommand) outputResultsTable(results map[string][]*healthcheck.Result) error { + for scanner, findings := range results { + c.UI.Output(scanner) + c.UI.Output(strings.Repeat("-", len(scanner))) + data := []string{"status" + hopeDelim + "endpoint" + hopeDelim + "message"} + for _, finding := range findings { + row := []string{ + finding.StatusDisplay, + finding.Endpoint, + finding.Message, + } + data = append(data, strings.Join(row, hopeDelim)) + } + + c.UI.Output(tableOutput(data, &columnize.Config{ + Delim: hopeDelim, + })) + c.UI.Output("\n") + } + + return nil +} + +func (c *PKIHealthCheckCommand) outputResultsJSON(results map[string][]*healthcheck.Result) error { + bytes, err := json.MarshalIndent(results, "", " ") + if err != nil { + return err + } + + c.UI.Output(string(bytes)) + return nil +} + +func (c *PKIHealthCheckCommand) outputResultsYAML(results map[string][]*healthcheck.Result) error { + bytes, err := yaml.Marshal(results) + if err != nil { + return err + } + + c.UI.Output(string(bytes)) + return nil +} + +func (c *PKIHealthCheckCommand) selectRetCode(results map[string][]*healthcheck.Result) int { + var highestResult healthcheck.ResultStatus = healthcheck.ResultNotApplicable + for _, findings := range results { + for _, finding := range findings { + if finding.Status > highestResult { + highestResult = finding.Status + } + } + } + + cutOff := healthcheck.ResultInformational + switch c.flagReturnIndicator { + case "", "default", "informational": + case "permission": + cutOff = healthcheck.ResultInvalidVersion + case "critical": + cutOff = healthcheck.ResultCritical + case "warning": + cutOff = healthcheck.ResultWarning + } + + if highestResult >= cutOff { + return int(highestResult) + } + + return pkiRetOK +} diff --git a/command/pki_health_check_test.go b/command/pki_health_check_test.go new file mode 100644 index 000000000..7c1937399 --- /dev/null +++ b/command/pki_health_check_test.go @@ -0,0 +1,98 @@ +package command + +import ( + "bytes" + "encoding/json" + "fmt" + "testing" + + "github.com/hashicorp/vault/api" + "github.com/mitchellh/cli" + + "github.com/stretchr/testify/require" +) + +func testPKIHealthCheckCommand(tb testing.TB) (*cli.MockUi, *PKIHealthCheckCommand) { + tb.Helper() + + ui := cli.NewMockUi() + return ui, &PKIHealthCheckCommand{ + BaseCommand: &BaseCommand{ + UI: ui, + }, + } +} + +func TestPKIHC_Run(t *testing.T) { + client, closer := testVaultServer(t) + defer closer() + + if err := client.Sys().Mount("pki", &api.MountInput{ + Type: "pki", + }); err != nil { + t.Fatalf("pki mount error: %#v", err) + } + + if resp, err := client.Logical().Write("pki/root/generate/internal", map[string]interface{}{ + "key_type": "ec", + "common_name": "Root X1", + "ttl": "876h", + }); err != nil || resp == nil { + t.Fatalf("failed to prime CA: %v", err) + } + + if _, err := client.Logical().Read("pki/crl/rotate"); err != nil { + t.Fatalf("failed to rotate CRLs: %v", err) + } + + stdout := bytes.NewBuffer(nil) + stderr := bytes.NewBuffer(nil) + runOpts := &RunOptions{ + Stdout: stdout, + Stderr: stderr, + Client: client, + } + + code := RunCustom([]string{"pki", "health-check", "-format=json", "pki"}, runOpts) + combined := stdout.String() + stderr.String() + + var results map[string][]map[string]interface{} + if err := json.Unmarshal([]byte(combined), &results); err != nil { + t.Fatalf("failed to decode json (ret %v): %v\njson:\n%v", code, err, combined) + } + + t.Log(combined) + + expected := map[string][]map[string]interface{}{ + "ca_validity_period": { + { + "status": "critical", + }, + }, + "crl_validity_period": { + { + "status": "ok", + }, + { + "status": "ok", + }, + }, + } + + for test, subtest := range expected { + actual, ok := results[test] + require.True(t, ok, fmt.Sprintf("expected top-level test %v to be present", test)) + require.NotNil(t, actual, fmt.Sprintf("expected top-level test %v to be non-empty; wanted wireframe format %v", test, subtest)) + require.Equal(t, len(subtest), len(actual), fmt.Sprintf("top-level test %v has different number of results %v in wireframe, %v in test output\nwireframe: %v\noutput: %v\n", test, len(subtest), len(actual), subtest, actual)) + + for index, subset := range subtest { + for key, value := range subset { + a_value, present := actual[index][key] + require.True(t, present) + if value != nil { + require.Equal(t, value, a_value) + } + } + } + } +}