Prevent panics in expiration invalidation, and make some changes for testing (#18401)

This commit is contained in:
Nick Cabatoff 2022-12-15 13:09:36 -05:00 committed by GitHub
parent 9d5f021792
commit 429916c135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 56 additions and 30 deletions

3
changelog/18401.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
expiration: Prevent panics on perf standbys when an irrevocable release gets deleted.
```

View File

@ -654,6 +654,7 @@ type Core struct {
rollbackPeriod time.Duration rollbackPeriod time.Duration
pendingRemovalMountsAllowed bool pendingRemovalMountsAllowed bool
expirationRevokeRetryBase time.Duration
} }
func (c *Core) HAState() consts.HAState { func (c *Core) HAState() consts.HAState {
@ -790,6 +791,8 @@ type CoreConfig struct {
RollbackPeriod time.Duration RollbackPeriod time.Duration
PendingRemovalMountsAllowed bool PendingRemovalMountsAllowed bool
ExpirationRevokeRetryBase time.Duration
} }
// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does // GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
@ -944,6 +947,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
effectiveSDKVersion: effectiveSDKVersion, effectiveSDKVersion: effectiveSDKVersion,
userFailedLoginInfo: make(map[FailedLoginUser]*FailedLoginInfo), userFailedLoginInfo: make(map[FailedLoginUser]*FailedLoginInfo),
pendingRemovalMountsAllowed: conf.PendingRemovalMountsAllowed, pendingRemovalMountsAllowed: conf.PendingRemovalMountsAllowed,
expirationRevokeRetryBase: conf.ExpirationRevokeRetryBase,
} }
c.standbyStopCh.Store(make(chan struct{})) c.standbyStopCh.Store(make(chan struct{}))

View File

@ -151,7 +151,8 @@ type ExpirationManager struct {
// request. This value should only be set by tests. // request. This value should only be set by tests.
testRegisterAuthFailure uberAtomic.Bool testRegisterAuthFailure uberAtomic.Bool
jobManager *fairshare.JobManager jobManager *fairshare.JobManager
revokeRetryBase time.Duration
} }
type ExpireLeaseStrategy func(context.Context, *ExpirationManager, string, *namespace.Namespace) type ExpireLeaseStrategy func(context.Context, *ExpirationManager, string, *namespace.Namespace)
@ -234,7 +235,6 @@ func (r *revocationJob) Execute() error {
func (r *revocationJob) OnFailure(err error) { func (r *revocationJob) OnFailure(err error) {
r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)}) r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)})
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err)
r.m.pendingLock.Lock() r.m.pendingLock.Lock()
defer r.m.pendingLock.Unlock() defer r.m.pendingLock.Unlock()
@ -246,12 +246,15 @@ func (r *revocationJob) OnFailure(err error) {
pending := pendingRaw.(pendingInfo) pending := pendingRaw.(pendingInfo)
pending.revokesAttempted++ pending.revokesAttempted++
newTimer := r.revokeExponentialBackoff(pending.revokesAttempted)
if pending.revokesAttempted >= maxRevokeAttempts || errIsUnrecoverable(err) { if pending.revokesAttempted >= maxRevokeAttempts || errIsUnrecoverable(err) {
r.m.logger.Trace("marking lease as irrevocable", "lease_id", r.leaseID, "error", err) reason := "unrecoverable error"
if pending.revokesAttempted >= maxRevokeAttempts { if pending.revokesAttempted >= maxRevokeAttempts {
r.m.logger.Trace("lease has consumed all retry attempts", "lease_id", r.leaseID) reason = "lease has consumed all retry attempts"
err = fmt.Errorf("%v: %w", outOfRetriesMessage, err) err = fmt.Errorf("%v: %w", outOfRetriesMessage, err)
} }
r.m.logger.Trace("failed to revoke lease, marking lease as irrevocable", "lease_id", r.leaseID, "error", err, "reason", reason)
le, loadErr := r.m.loadEntry(r.nsCtx, r.leaseID) le, loadErr := r.m.loadEntry(r.nsCtx, r.leaseID)
if loadErr != nil { if loadErr != nil {
@ -265,9 +268,12 @@ func (r *revocationJob) OnFailure(err error) {
r.m.markLeaseIrrevocable(r.nsCtx, le, err) r.m.markLeaseIrrevocable(r.nsCtx, le, err)
return return
} else {
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err,
"attempts", pending.revokesAttempted, "next_attempt", newTimer)
} }
pending.timer.Reset(revokeExponentialBackoff(pending.revokesAttempted)) pending.timer.Reset(newTimer)
r.m.pending.Store(r.leaseID, pending) r.m.pending.Store(r.leaseID, pending)
} }
@ -285,8 +291,8 @@ func expireLeaseStrategyFairsharing(ctx context.Context, m *ExpirationManager, l
m.jobManager.AddJob(job, mountAccessor) m.jobManager.AddJob(job, mountAccessor)
} }
func revokeExponentialBackoff(attempt uint8) time.Duration { func (r *revocationJob) revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * revokeRetryBase exp := (1 << attempt) * r.m.revokeRetryBase
randomDelta := 0.5 * float64(exp) randomDelta := 0.5 * float64(exp)
// Allow backoff time to be a random value between exp +/- (0.5*exp) // Allow backoff time to be a random value between exp +/- (0.5*exp)
@ -351,7 +357,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "", logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e, expireFunc: e,
jobManager: jobManager, jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
} }
*exp.restoreMode = 1 *exp.restoreMode = 1
@ -495,16 +505,14 @@ func (m *ExpirationManager) invalidate(key string) {
m.nonexpiring.Delete(leaseID) m.nonexpiring.Delete(leaseID)
if info, ok := m.irrevocable.Load(leaseID); ok { if info, ok := m.irrevocable.Load(leaseID); ok {
irrevocable := info.(pendingInfo) ile := info.(*leaseEntry)
m.irrevocable.Delete(leaseID) m.irrevocable.Delete(leaseID)
m.irrevocableLeaseCount-- m.irrevocableLeaseCount--
m.leaseCount-- m.leaseCount--
// Avoid nil pointer dereference. Without cachedLeaseInfo we do not have enough information to // Note that the leaseEntry should never be nil under normal operation.
// accurately update quota lease information. if ile != nil {
// Note that cachedLeaseInfo should never be nil under normal operation. leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: ile.LoginRole}
if irrevocable.cachedLeaseInfo != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: irrevocable.cachedLeaseInfo.LoginRole}
if err := m.core.quotasHandleLeases(ctx, quotas.LeaseActionDeleted, []*quotas.QuotaLeaseInformation{leaseInfo}); err != nil { if err := m.core.quotasHandleLeases(ctx, quotas.LeaseActionDeleted, []*quotas.QuotaLeaseInformation{leaseInfo}); err != nil {
m.logger.Error("failed to update quota on lease invalidation", "error", err) m.logger.Error("failed to update quota on lease invalidation", "error", err)
return return

View File

@ -16,20 +16,32 @@ import (
// PassthroughBackendFactory returns a PassthroughBackend // PassthroughBackendFactory returns a PassthroughBackend
// with leases switched off // with leases switched off
func PassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) { func PassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, false) return LeaseSwitchedPassthroughBackend(ctx, conf, nil)
} }
// LeasedPassthroughBackendFactory returns a PassthroughBackend // LeasedPassthroughBackendFactory returns a PassthroughBackend
// with leases switched on // with leases switched on
func LeasedPassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) { func LeasedPassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, true) return LeaseSwitchedPassthroughBackend(ctx, conf, func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
})
} }
type revokeFunc func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
// LeaseSwitchedPassthroughBackend returns a PassthroughBackend // LeaseSwitchedPassthroughBackend returns a PassthroughBackend
// with leases switched on or off // with leases switched on or off
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, leases bool) (logical.Backend, error) { func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, revoke revokeFunc) (logical.Backend, error) {
var b PassthroughBackend var b PassthroughBackend
b.generateLeases = leases if revoke == nil {
// We probably don't need this, since we should never have to handle revoke requests, but just in case...
b.revoke = func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
}
} else {
b.generateLeases = true
b.revoke = revoke
}
b.Backend = &framework.Backend{ b.Backend = &framework.Backend{
Help: strings.TrimSpace(passthroughHelp), Help: strings.TrimSpace(passthroughHelp),
@ -65,7 +77,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
Type: "kv", Type: "kv",
Renew: b.handleRead, Renew: b.handleRead,
Revoke: b.handleRevoke, Revoke: b.revoke,
}, },
} }
@ -84,6 +96,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
type PassthroughBackend struct { type PassthroughBackend struct {
*framework.Backend *framework.Backend
generateLeases bool generateLeases bool
revoke func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
} }
func (b *PassthroughBackend) handleRevoke(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) { func (b *PassthroughBackend) handleRevoke(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {

View File

@ -197,7 +197,8 @@ func TestPassthroughBackend_List(t *testing.T) {
} }
func TestPassthroughBackend_Revoke(t *testing.T) { func TestPassthroughBackend_Revoke(t *testing.T) {
test := func(b logical.Backend) { test := func(t *testing.T, b logical.Backend) {
t.Helper()
req := logical.TestRequest(t, logical.RevokeOperation, "kv") req := logical.TestRequest(t, logical.RevokeOperation, "kv")
req.Secret = &logical.Secret{ req.Secret = &logical.Secret{
InternalData: map[string]interface{}{ InternalData: map[string]interface{}{
@ -209,10 +210,12 @@ func TestPassthroughBackend_Revoke(t *testing.T) {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
} }
b := testPassthroughBackend() t.Run("passthrough", func(t *testing.T) {
test(b) test(t, testPassthroughBackend())
b = testPassthroughLeasedBackend() })
test(b) t.Run("passthrough-leased", func(t *testing.T) {
test(t, testPassthroughLeasedBackend())
})
} }
func testPassthroughBackend() logical.Backend { func testPassthroughBackend() logical.Backend {

View File

@ -1701,20 +1701,15 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
} }
coreConfig.ClusterCipherSuites = base.ClusterCipherSuites coreConfig.ClusterCipherSuites = base.ClusterCipherSuites
coreConfig.DisableCache = base.DisableCache coreConfig.DisableCache = base.DisableCache
coreConfig.DevToken = base.DevToken coreConfig.DevToken = base.DevToken
coreConfig.RecoveryMode = base.RecoveryMode coreConfig.RecoveryMode = base.RecoveryMode
coreConfig.ActivityLogConfig = base.ActivityLogConfig coreConfig.ActivityLogConfig = base.ActivityLogConfig
coreConfig.EnableResponseHeaderHostname = base.EnableResponseHeaderHostname coreConfig.EnableResponseHeaderHostname = base.EnableResponseHeaderHostname
coreConfig.EnableResponseHeaderRaftNodeID = base.EnableResponseHeaderRaftNodeID coreConfig.EnableResponseHeaderRaftNodeID = base.EnableResponseHeaderRaftNodeID
coreConfig.RollbackPeriod = base.RollbackPeriod coreConfig.RollbackPeriod = base.RollbackPeriod
coreConfig.PendingRemovalMountsAllowed = base.PendingRemovalMountsAllowed coreConfig.PendingRemovalMountsAllowed = base.PendingRemovalMountsAllowed
coreConfig.ExpirationRevokeRetryBase = base.ExpirationRevokeRetryBase
testApplyEntBaseConfig(coreConfig, base) testApplyEntBaseConfig(coreConfig, base)
} }
if coreConfig.ClusterName == "" { if coreConfig.ClusterName == "" {