b8576a8de4
* Add path to manually rebuild delta CRLs The crl/rotate-delta path behaves like crl/rotate, triggering a cluster-local rebuild of just the delta CRL. This is useful for when delta CRLs are enabled with a longer-than-desired auto-rebuild period after some high-profile revocations occur. In the event delta CRLs are not enabled, this becomes a no-op. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Add tests for Delta CRL rebuilding Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> * Update documentation about Delta CRLs Also fixes a omission in the If-Modified-Since docs to mention that the response header should probably also be passed through. Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com> Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
530 lines
15 KiB
Go
530 lines
15 KiB
Go
package pki
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/hashicorp/vault/sdk/helper/consts"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/hashicorp/vault/helper/metricsutil"
|
|
"github.com/hashicorp/vault/helper/namespace"
|
|
"github.com/hashicorp/vault/sdk/framework"
|
|
"github.com/hashicorp/vault/sdk/logical"
|
|
)
|
|
|
|
const (
|
|
noRole = 0
|
|
roleOptional = 1
|
|
roleRequired = 2
|
|
)
|
|
|
|
/*
|
|
* PKI requests are a bit special to keep up with the various failure and load issues.
|
|
*
|
|
* Any requests to write/delete shared data (such as roles, issuers, keys, and configuration)
|
|
* are always forwarded to the Primary cluster's active node to write and send the key
|
|
* material/config globally across all clusters. Reads should be handled locally, to give a
|
|
* sense of where this cluster's replication state is at.
|
|
*
|
|
* CRL/Revocation and Fetch Certificate APIs are handled by the active node within the cluster
|
|
* they originate. This means, if a request comes into a performance secondary cluster, the writes
|
|
* will be forwarded to that cluster's active node and not go all the way up to the performance primary's
|
|
* active node.
|
|
*
|
|
* If a certificate issue request has a role in which no_store is set to true, that node itself
|
|
* will issue the certificate and not forward the request to the active node, as this does not
|
|
* need to write to storage.
|
|
*
|
|
* Following the same pattern, if a managed key is involved to sign an issued certificate request
|
|
* and the local node does not have access for some reason to it, the request will be forwarded to
|
|
* the active node within the cluster only.
|
|
*
|
|
* To make sense of what goes where the following bits need to be analyzed within the codebase.
|
|
*
|
|
* 1. The backend LocalStorage paths determine what storage paths will remain within a
|
|
* cluster and not be forwarded to a performance primary
|
|
* 2. Within each path's OperationHandler definition, check to see if ForwardPerformanceStandby &
|
|
* ForwardPerformanceSecondary flags are set to short-circuit the request to a given active node
|
|
* 3. Within the managed key util class in pki, an initialization failure could cause the request
|
|
* to be forwarded to an active node if not already on it.
|
|
*/
|
|
|
|
// Factory creates a new backend implementing the logical.Backend interface
|
|
func Factory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
|
|
b := Backend(conf)
|
|
if err := b.Setup(ctx, conf); err != nil {
|
|
return nil, err
|
|
}
|
|
return b, nil
|
|
}
|
|
|
|
// Backend returns a new Backend framework struct
|
|
func Backend(conf *logical.BackendConfig) *backend {
|
|
var b backend
|
|
b.Backend = &framework.Backend{
|
|
Help: strings.TrimSpace(backendHelp),
|
|
|
|
PathsSpecial: &logical.Paths{
|
|
Unauthenticated: []string{
|
|
"cert/*",
|
|
"ca/pem",
|
|
"ca_chain",
|
|
"ca",
|
|
"crl/delta",
|
|
"crl/delta/pem",
|
|
"crl/pem",
|
|
"crl",
|
|
"issuer/+/crl/der",
|
|
"issuer/+/crl/pem",
|
|
"issuer/+/crl",
|
|
"issuer/+/crl/delta/der",
|
|
"issuer/+/crl/delta/pem",
|
|
"issuer/+/crl/delta",
|
|
"issuer/+/pem",
|
|
"issuer/+/der",
|
|
"issuer/+/json",
|
|
"issuers/", // LIST operations append a '/' to the requested path
|
|
"ocsp", // OCSP POST
|
|
"ocsp/*", // OCSP GET
|
|
},
|
|
|
|
LocalStorage: []string{
|
|
revokedPath,
|
|
deltaWALPath,
|
|
legacyCRLPath,
|
|
"crls/",
|
|
"certs/",
|
|
},
|
|
|
|
Root: []string{
|
|
"root",
|
|
"root/sign-self-issued",
|
|
},
|
|
|
|
SealWrapStorage: []string{
|
|
legacyCertBundlePath,
|
|
keyPrefix,
|
|
},
|
|
},
|
|
|
|
Paths: []*framework.Path{
|
|
pathListRoles(&b),
|
|
pathRoles(&b),
|
|
pathGenerateRoot(&b),
|
|
pathSignIntermediate(&b),
|
|
pathSignSelfIssued(&b),
|
|
pathDeleteRoot(&b),
|
|
pathGenerateIntermediate(&b),
|
|
pathSetSignedIntermediate(&b),
|
|
pathConfigCA(&b),
|
|
pathConfigCRL(&b),
|
|
pathConfigURLs(&b),
|
|
pathSignVerbatim(&b),
|
|
pathSign(&b),
|
|
pathIssue(&b),
|
|
pathRotateCRL(&b),
|
|
pathRotateDeltaCRL(&b),
|
|
pathRevoke(&b),
|
|
pathRevokeWithKey(&b),
|
|
pathTidy(&b),
|
|
pathTidyCancel(&b),
|
|
pathTidyStatus(&b),
|
|
pathConfigAutoTidy(&b),
|
|
|
|
// Issuer APIs
|
|
pathListIssuers(&b),
|
|
pathGetIssuer(&b),
|
|
pathGetIssuerCRL(&b),
|
|
pathImportIssuer(&b),
|
|
pathIssuerIssue(&b),
|
|
pathIssuerSign(&b),
|
|
pathIssuerSignIntermediate(&b),
|
|
pathIssuerSignSelfIssued(&b),
|
|
pathIssuerSignVerbatim(&b),
|
|
pathIssuerGenerateRoot(&b),
|
|
pathRotateRoot(&b),
|
|
pathIssuerGenerateIntermediate(&b),
|
|
pathCrossSignIntermediate(&b),
|
|
pathConfigIssuers(&b),
|
|
pathReplaceRoot(&b),
|
|
pathRevokeIssuer(&b),
|
|
|
|
// Key APIs
|
|
pathListKeys(&b),
|
|
pathKey(&b),
|
|
pathGenerateKey(&b),
|
|
pathImportKey(&b),
|
|
pathConfigKeys(&b),
|
|
|
|
// Fetch APIs have been lowered to favor the newer issuer API endpoints
|
|
pathFetchCA(&b),
|
|
pathFetchCAChain(&b),
|
|
pathFetchCRL(&b),
|
|
pathFetchCRLViaCertPath(&b),
|
|
pathFetchValidRaw(&b),
|
|
pathFetchValid(&b),
|
|
pathFetchListCerts(&b),
|
|
|
|
// OCSP APIs
|
|
buildPathOcspGet(&b),
|
|
buildPathOcspPost(&b),
|
|
},
|
|
|
|
Secrets: []*framework.Secret{
|
|
secretCerts(&b),
|
|
},
|
|
|
|
BackendType: logical.TypeLogical,
|
|
InitializeFunc: b.initialize,
|
|
Invalidate: b.invalidate,
|
|
PeriodicFunc: b.periodicFunc,
|
|
}
|
|
|
|
b.tidyCASGuard = new(uint32)
|
|
b.tidyCancelCAS = new(uint32)
|
|
b.tidyStatus = &tidyStatus{state: tidyStatusInactive}
|
|
b.storage = conf.StorageView
|
|
b.backendUUID = conf.BackendUUID
|
|
|
|
b.pkiStorageVersion.Store(0)
|
|
|
|
b.crlBuilder = newCRLBuilder()
|
|
|
|
// Delay the first tidy until after we've started up.
|
|
b.lastTidy = time.Now()
|
|
|
|
return &b
|
|
}
|
|
|
|
type backend struct {
|
|
*framework.Backend
|
|
|
|
backendUUID string
|
|
storage logical.Storage
|
|
revokeStorageLock sync.RWMutex
|
|
tidyCASGuard *uint32
|
|
tidyCancelCAS *uint32
|
|
|
|
tidyStatusLock sync.RWMutex
|
|
tidyStatus *tidyStatus
|
|
lastTidy time.Time
|
|
|
|
pkiStorageVersion atomic.Value
|
|
crlBuilder *crlBuilder
|
|
|
|
// Write lock around issuers and keys.
|
|
issuersLock sync.RWMutex
|
|
}
|
|
|
|
type (
|
|
tidyStatusState int
|
|
roleOperation func(ctx context.Context, req *logical.Request, data *framework.FieldData, role *roleEntry) (*logical.Response, error)
|
|
)
|
|
|
|
const (
|
|
tidyStatusInactive tidyStatusState = iota
|
|
tidyStatusStarted = iota
|
|
tidyStatusFinished = iota
|
|
tidyStatusError = iota
|
|
tidyStatusCancelling = iota
|
|
tidyStatusCancelled = iota
|
|
)
|
|
|
|
type tidyStatus struct {
|
|
// Parameters used to initiate the operation
|
|
safetyBuffer int
|
|
tidyCertStore bool
|
|
tidyRevokedCerts bool
|
|
tidyRevokedAssocs bool
|
|
pauseDuration string
|
|
|
|
// Status
|
|
state tidyStatusState
|
|
err error
|
|
timeStarted time.Time
|
|
timeFinished time.Time
|
|
message string
|
|
certStoreDeletedCount uint
|
|
revokedCertDeletedCount uint
|
|
missingIssuerCertCount uint
|
|
}
|
|
|
|
const backendHelp = `
|
|
The PKI backend dynamically generates X509 server and client certificates.
|
|
|
|
After mounting this backend, configure the CA using the "pem_bundle" endpoint within
|
|
the "config/" path.
|
|
`
|
|
|
|
func metricsKey(req *logical.Request, extra ...string) []string {
|
|
if req == nil || req.MountPoint == "" {
|
|
return extra
|
|
}
|
|
key := make([]string, len(extra)+1)
|
|
key[0] = req.MountPoint[:len(req.MountPoint)-1]
|
|
copy(key[1:], extra)
|
|
return key
|
|
}
|
|
|
|
func (b *backend) metricsWrap(callType string, roleMode int, ofunc roleOperation) framework.OperationFunc {
|
|
return func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
|
|
key := metricsKey(req, callType)
|
|
var role *roleEntry
|
|
var labels []metrics.Label
|
|
var err error
|
|
|
|
var roleName string
|
|
switch roleMode {
|
|
case roleRequired:
|
|
roleName = data.Get("role").(string)
|
|
case roleOptional:
|
|
r, ok := data.GetOk("role")
|
|
if ok {
|
|
roleName = r.(string)
|
|
}
|
|
}
|
|
if roleMode > noRole {
|
|
// Get the role
|
|
role, err = b.getRole(ctx, req.Storage, roleName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if role == nil && (roleMode == roleRequired || len(roleName) > 0) {
|
|
return logical.ErrorResponse(fmt.Sprintf("unknown role: %s", roleName)), nil
|
|
}
|
|
labels = []metrics.Label{{"role", roleName}}
|
|
}
|
|
|
|
ns, err := namespace.FromContext(ctx)
|
|
if err == nil {
|
|
labels = append(labels, metricsutil.NamespaceLabel(ns))
|
|
}
|
|
|
|
start := time.Now()
|
|
defer metrics.MeasureSinceWithLabels(key, start, labels)
|
|
resp, err := ofunc(ctx, req, data, role)
|
|
|
|
if err != nil || resp.IsError() {
|
|
metrics.IncrCounterWithLabels(append(key, "failure"), 1.0, labels)
|
|
} else {
|
|
metrics.IncrCounterWithLabels(key, 1.0, labels)
|
|
}
|
|
return resp, err
|
|
}
|
|
}
|
|
|
|
// initialize is used to perform a possible PKI storage migration if needed
|
|
func (b *backend) initialize(ctx context.Context, _ *logical.InitializationRequest) error {
|
|
sc := b.makeStorageContext(ctx, b.storage)
|
|
if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Grab the lock prior to the updating of the storage lock preventing us flipping
|
|
// the storage flag midway through the request stream of other requests.
|
|
b.issuersLock.Lock()
|
|
defer b.issuersLock.Unlock()
|
|
|
|
// Load up our current pki storage state, no matter the host type we are on.
|
|
b.updatePkiStorageVersion(ctx, false)
|
|
|
|
// Early exit if not a primary cluster or performance secondary with a local mount.
|
|
if b.System().ReplicationState().HasState(consts.ReplicationDRSecondary|consts.ReplicationPerformanceStandby) ||
|
|
(!b.System().LocalMount() && b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary)) {
|
|
b.Logger().Debug("skipping PKI migration as we are not on primary or secondary with a local mount")
|
|
return nil
|
|
}
|
|
|
|
if err := migrateStorage(ctx, b, b.storage); err != nil {
|
|
b.Logger().Error("Error during migration of PKI mount: " + err.Error())
|
|
return err
|
|
}
|
|
|
|
b.updatePkiStorageVersion(ctx, false)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (b *backend) useLegacyBundleCaStorage() bool {
|
|
// This helper function is here to choose whether or not we use the newer
|
|
// issuer/key storage format or the older legacy ca bundle format.
|
|
//
|
|
// This happens because we might've upgraded secondary PR clusters to
|
|
// newer vault code versions. We still want to be able to service requests
|
|
// with the old bundle format (e.g., issuing and revoking certs), until
|
|
// the primary cluster's active node is upgraded to the newer Vault version
|
|
// and the storage is migrated to the new format.
|
|
version := b.pkiStorageVersion.Load()
|
|
return version == nil || version == 0
|
|
}
|
|
|
|
func (b *backend) updatePkiStorageVersion(ctx context.Context, grabIssuersLock bool) {
|
|
info, err := getMigrationInfo(ctx, b.storage)
|
|
if err != nil {
|
|
b.Logger().Error(fmt.Sprintf("Failed loading PKI migration status, staying in legacy mode: %v", err))
|
|
return
|
|
}
|
|
|
|
if grabIssuersLock {
|
|
b.issuersLock.Lock()
|
|
defer b.issuersLock.Unlock()
|
|
}
|
|
|
|
if info.isRequired {
|
|
b.pkiStorageVersion.Store(0)
|
|
} else {
|
|
b.pkiStorageVersion.Store(1)
|
|
}
|
|
}
|
|
|
|
func (b *backend) invalidate(ctx context.Context, key string) {
|
|
switch {
|
|
case strings.HasPrefix(key, legacyMigrationBundleLogKey):
|
|
// This is for a secondary cluster to pick up that the migration has completed
|
|
// and reset its compatibility mode and rebuild the CRL locally. Kick it off
|
|
// as a go routine to not block this call due to the lock grabbing
|
|
// within updatePkiStorageVersion.
|
|
go func() {
|
|
b.Logger().Info("Detected a migration completed, resetting pki storage version")
|
|
b.updatePkiStorageVersion(ctx, true)
|
|
b.crlBuilder.requestRebuildIfActiveNode(b)
|
|
}()
|
|
case strings.HasPrefix(key, issuerPrefix):
|
|
if !b.useLegacyBundleCaStorage() {
|
|
// See note in updateDefaultIssuerId about why this is necessary.
|
|
// We do this ahead of CRL rebuilding just so we know that things
|
|
// are stale.
|
|
b.crlBuilder.invalidateCRLBuildTime()
|
|
|
|
// If an issuer has changed on the primary, we need to schedule an update of our CRL,
|
|
// the primary cluster would have done it already, but the CRL is cluster specific so
|
|
// force a rebuild of ours.
|
|
b.crlBuilder.requestRebuildIfActiveNode(b)
|
|
} else {
|
|
b.Logger().Debug("Ignoring invalidation updates for issuer as the PKI migration has yet to complete.")
|
|
}
|
|
case key == "config/crl":
|
|
// We may need to reload our OCSP status flag
|
|
b.crlBuilder.markConfigDirty()
|
|
case key == storageIssuerConfig:
|
|
b.crlBuilder.invalidateCRLBuildTime()
|
|
}
|
|
}
|
|
|
|
func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) error {
|
|
sc := b.makeStorageContext(ctx, request.Storage)
|
|
|
|
doCRL := func() error {
|
|
// First attempt to reload the CRL configuration.
|
|
if err := b.crlBuilder.reloadConfigIfRequired(sc); err != nil {
|
|
return err
|
|
}
|
|
|
|
// As we're (below) modifying the backing storage, we need to ensure
|
|
// we're not on a standby/secondary node.
|
|
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) ||
|
|
b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) {
|
|
return nil
|
|
}
|
|
|
|
// Check if we're set to auto rebuild and a CRL is set to expire.
|
|
if err := b.crlBuilder.checkForAutoRebuild(sc); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Then attempt to rebuild the CRLs if required.
|
|
if err := b.crlBuilder.rebuildIfForced(ctx, b, request); err != nil {
|
|
return err
|
|
}
|
|
|
|
// If a delta CRL was rebuilt above as part of the complete CRL rebuild,
|
|
// this will be a no-op. However, if we do need to rebuild delta CRLs,
|
|
// this would cause us to do so.
|
|
if err := b.crlBuilder.rebuildDeltaCRLsIfForced(sc, false); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
doAutoTidy := func() error {
|
|
// As we're (below) modifying the backing storage, we need to ensure
|
|
// we're not on a standby/secondary node.
|
|
if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) ||
|
|
b.System().ReplicationState().HasState(consts.ReplicationDRSecondary) {
|
|
return nil
|
|
}
|
|
|
|
config, err := sc.getAutoTidyConfig()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !config.Enabled || config.Interval <= 0*time.Second {
|
|
return nil
|
|
}
|
|
|
|
// Check if we should run another tidy...
|
|
now := time.Now()
|
|
b.tidyStatusLock.RLock()
|
|
nextOp := b.lastTidy.Add(config.Interval)
|
|
b.tidyStatusLock.RUnlock()
|
|
if now.Before(nextOp) {
|
|
return nil
|
|
}
|
|
|
|
// Ensure a tidy isn't already running... If it is, we'll trigger
|
|
// again when the running one finishes.
|
|
if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) {
|
|
return nil
|
|
}
|
|
|
|
// Prevent ourselves from starting another tidy operation while
|
|
// this one is still running. This operation runs in the background
|
|
// and has a separate error reporting mechanism.
|
|
b.tidyStatusLock.Lock()
|
|
b.lastTidy = now
|
|
b.tidyStatusLock.Unlock()
|
|
|
|
// Because the request from the parent storage will be cleared at
|
|
// some point (and potentially reused) -- due to tidy executing in
|
|
// a background goroutine -- we need to copy the storage entry off
|
|
// of the backend instead.
|
|
backendReq := &logical.Request{
|
|
Storage: b.storage,
|
|
}
|
|
|
|
b.startTidyOperation(backendReq, config)
|
|
return nil
|
|
}
|
|
|
|
crlErr := doCRL()
|
|
tidyErr := doAutoTidy()
|
|
|
|
if crlErr != nil && tidyErr != nil {
|
|
return fmt.Errorf("Error building CRLs:\n - %v\n\nError running auto-tidy:\n - %v\n", crlErr, tidyErr)
|
|
}
|
|
|
|
if crlErr != nil {
|
|
return fmt.Errorf("Error building CRLs:\n - %v\n", crlErr)
|
|
}
|
|
|
|
if tidyErr != nil {
|
|
return fmt.Errorf("Error running auto-tidy:\n - %v\n", tidyErr)
|
|
}
|
|
|
|
// Check if the CRL was invalidated due to issuer swap and update
|
|
// accordingly.
|
|
if err := b.crlBuilder.flushCRLBuildTimeInvalidation(sc); err != nil {
|
|
return err
|
|
}
|
|
|
|
// All good!
|
|
return nil
|
|
}
|