Prevent panics in expiration invalidation, and make some changes for testing (#18401)

This commit is contained in:
Nick Cabatoff 2022-12-15 13:09:36 -05:00 committed by GitHub
parent 9d5f021792
commit 429916c135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 56 additions and 30 deletions

3
changelog/18401.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
expiration: Prevent panics on perf standbys when an irrevocable release gets deleted.
```

View File

@ -654,6 +654,7 @@ type Core struct {
rollbackPeriod time.Duration
pendingRemovalMountsAllowed bool
expirationRevokeRetryBase time.Duration
}
func (c *Core) HAState() consts.HAState {
@ -790,6 +791,8 @@ type CoreConfig struct {
RollbackPeriod time.Duration
PendingRemovalMountsAllowed bool
ExpirationRevokeRetryBase time.Duration
}
// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
@ -944,6 +947,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
effectiveSDKVersion: effectiveSDKVersion,
userFailedLoginInfo: make(map[FailedLoginUser]*FailedLoginInfo),
pendingRemovalMountsAllowed: conf.PendingRemovalMountsAllowed,
expirationRevokeRetryBase: conf.ExpirationRevokeRetryBase,
}
c.standbyStopCh.Store(make(chan struct{}))

View File

@ -151,7 +151,8 @@ type ExpirationManager struct {
// request. This value should only be set by tests.
testRegisterAuthFailure uberAtomic.Bool
jobManager *fairshare.JobManager
jobManager *fairshare.JobManager
revokeRetryBase time.Duration
}
type ExpireLeaseStrategy func(context.Context, *ExpirationManager, string, *namespace.Namespace)
@ -234,7 +235,6 @@ func (r *revocationJob) Execute() error {
func (r *revocationJob) OnFailure(err error) {
r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)})
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err)
r.m.pendingLock.Lock()
defer r.m.pendingLock.Unlock()
@ -246,12 +246,15 @@ func (r *revocationJob) OnFailure(err error) {
pending := pendingRaw.(pendingInfo)
pending.revokesAttempted++
newTimer := r.revokeExponentialBackoff(pending.revokesAttempted)
if pending.revokesAttempted >= maxRevokeAttempts || errIsUnrecoverable(err) {
r.m.logger.Trace("marking lease as irrevocable", "lease_id", r.leaseID, "error", err)
reason := "unrecoverable error"
if pending.revokesAttempted >= maxRevokeAttempts {
r.m.logger.Trace("lease has consumed all retry attempts", "lease_id", r.leaseID)
reason = "lease has consumed all retry attempts"
err = fmt.Errorf("%v: %w", outOfRetriesMessage, err)
}
r.m.logger.Trace("failed to revoke lease, marking lease as irrevocable", "lease_id", r.leaseID, "error", err, "reason", reason)
le, loadErr := r.m.loadEntry(r.nsCtx, r.leaseID)
if loadErr != nil {
@ -265,9 +268,12 @@ func (r *revocationJob) OnFailure(err error) {
r.m.markLeaseIrrevocable(r.nsCtx, le, err)
return
} else {
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err,
"attempts", pending.revokesAttempted, "next_attempt", newTimer)
}
pending.timer.Reset(revokeExponentialBackoff(pending.revokesAttempted))
pending.timer.Reset(newTimer)
r.m.pending.Store(r.leaseID, pending)
}
@ -285,8 +291,8 @@ func expireLeaseStrategyFairsharing(ctx context.Context, m *ExpirationManager, l
m.jobManager.AddJob(job, mountAccessor)
}
func revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * revokeRetryBase
func (r *revocationJob) revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * r.m.revokeRetryBase
randomDelta := 0.5 * float64(exp)
// Allow backoff time to be a random value between exp +/- (0.5*exp)
@ -351,7 +357,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e,
jobManager: jobManager,
jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
}
*exp.restoreMode = 1
@ -495,16 +505,14 @@ func (m *ExpirationManager) invalidate(key string) {
m.nonexpiring.Delete(leaseID)
if info, ok := m.irrevocable.Load(leaseID); ok {
irrevocable := info.(pendingInfo)
ile := info.(*leaseEntry)
m.irrevocable.Delete(leaseID)
m.irrevocableLeaseCount--
m.leaseCount--
// Avoid nil pointer dereference. Without cachedLeaseInfo we do not have enough information to
// accurately update quota lease information.
// Note that cachedLeaseInfo should never be nil under normal operation.
if irrevocable.cachedLeaseInfo != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: irrevocable.cachedLeaseInfo.LoginRole}
// Note that the leaseEntry should never be nil under normal operation.
if ile != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: ile.LoginRole}
if err := m.core.quotasHandleLeases(ctx, quotas.LeaseActionDeleted, []*quotas.QuotaLeaseInformation{leaseInfo}); err != nil {
m.logger.Error("failed to update quota on lease invalidation", "error", err)
return

View File

@ -16,20 +16,32 @@ import (
// PassthroughBackendFactory returns a PassthroughBackend
// with leases switched off
func PassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, false)
return LeaseSwitchedPassthroughBackend(ctx, conf, nil)
}
// LeasedPassthroughBackendFactory returns a PassthroughBackend
// with leases switched on
func LeasedPassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, true)
return LeaseSwitchedPassthroughBackend(ctx, conf, func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
})
}
type revokeFunc func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
// LeaseSwitchedPassthroughBackend returns a PassthroughBackend
// with leases switched on or off
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, leases bool) (logical.Backend, error) {
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, revoke revokeFunc) (logical.Backend, error) {
var b PassthroughBackend
b.generateLeases = leases
if revoke == nil {
// We probably don't need this, since we should never have to handle revoke requests, but just in case...
b.revoke = func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
}
} else {
b.generateLeases = true
b.revoke = revoke
}
b.Backend = &framework.Backend{
Help: strings.TrimSpace(passthroughHelp),
@ -65,7 +77,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
Type: "kv",
Renew: b.handleRead,
Revoke: b.handleRevoke,
Revoke: b.revoke,
},
}
@ -84,6 +96,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
type PassthroughBackend struct {
*framework.Backend
generateLeases bool
revoke func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
}
func (b *PassthroughBackend) handleRevoke(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {

View File

@ -197,7 +197,8 @@ func TestPassthroughBackend_List(t *testing.T) {
}
func TestPassthroughBackend_Revoke(t *testing.T) {
test := func(b logical.Backend) {
test := func(t *testing.T, b logical.Backend) {
t.Helper()
req := logical.TestRequest(t, logical.RevokeOperation, "kv")
req.Secret = &logical.Secret{
InternalData: map[string]interface{}{
@ -209,10 +210,12 @@ func TestPassthroughBackend_Revoke(t *testing.T) {
t.Fatalf("err: %v", err)
}
}
b := testPassthroughBackend()
test(b)
b = testPassthroughLeasedBackend()
test(b)
t.Run("passthrough", func(t *testing.T) {
test(t, testPassthroughBackend())
})
t.Run("passthrough-leased", func(t *testing.T) {
test(t, testPassthroughLeasedBackend())
})
}
func testPassthroughBackend() logical.Backend {

View File

@ -1701,20 +1701,15 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
}
coreConfig.ClusterCipherSuites = base.ClusterCipherSuites
coreConfig.DisableCache = base.DisableCache
coreConfig.DevToken = base.DevToken
coreConfig.RecoveryMode = base.RecoveryMode
coreConfig.ActivityLogConfig = base.ActivityLogConfig
coreConfig.EnableResponseHeaderHostname = base.EnableResponseHeaderHostname
coreConfig.EnableResponseHeaderRaftNodeID = base.EnableResponseHeaderRaftNodeID
coreConfig.RollbackPeriod = base.RollbackPeriod
coreConfig.PendingRemovalMountsAllowed = base.PendingRemovalMountsAllowed
coreConfig.ExpirationRevokeRetryBase = base.ExpirationRevokeRetryBase
testApplyEntBaseConfig(coreConfig, base)
}
if coreConfig.ClusterName == "" {