add core state lock deadlock detection config option v2 (#18604)

* add core state lockd eadlock detection config option v2

* add changelog

* split out NewTestCluster function to maintain build flag

* replace long func with constant

* remove line

* rename file, and move where detect deadlock flag is set
This commit is contained in:
Ellie 2023-01-11 13:32:05 -06:00 committed by GitHub
parent 94a4f7d3c9
commit 6f7757e949
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 131 additions and 30 deletions

3
changelog/18604.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
core: add `detect_deadlocks` config to optionally detect core state deadlocks
```

View File

@ -2619,6 +2619,7 @@ func createCoreConfig(c *ServerCommand, config *server.Config, backend physical.
CredentialBackends: c.CredentialBackends,
LogicalBackends: c.LogicalBackends,
Logger: c.logger,
DetectDeadlocks: config.DetectDeadlocks,
DisableSentinelTrace: config.DisableSentinelTrace,
DisableCache: config.DisableCache,
DisableMlock: config.DisableMlock,

View File

@ -97,6 +97,8 @@ type Config struct {
LogRequestsLevel string `hcl:"-"`
LogRequestsLevelRaw interface{} `hcl:"log_requests_level"`
DetectDeadlocks string `hcl:"detect_deadlocks"`
EnableResponseHeaderRaftNodeID bool `hcl:"-"`
EnableResponseHeaderRaftNodeIDRaw interface{} `hcl:"enable_response_header_raft_node_id"`
@ -389,6 +391,11 @@ func (c *Config) Merge(c2 *Config) *Config {
result.LogRequestsLevel = c2.LogRequestsLevel
}
result.DetectDeadlocks = c.DetectDeadlocks
if c2.DetectDeadlocks != "" {
result.DetectDeadlocks = c2.DetectDeadlocks
}
result.EnableResponseHeaderRaftNodeID = c.EnableResponseHeaderRaftNodeID
if c2.EnableResponseHeaderRaftNodeID {
result.EnableResponseHeaderRaftNodeID = c2.EnableResponseHeaderRaftNodeID
@ -1025,6 +1032,8 @@ func (c *Config) Sanitized() map[string]interface{} {
"enable_response_header_raft_node_id": c.EnableResponseHeaderRaftNodeID,
"log_requests_level": c.LogRequestsLevel,
"detect_deadlocks": c.DetectDeadlocks,
}
for k, v := range sharedResult {
result[k] = v

View File

@ -745,6 +745,7 @@ func testConfig_Sanitized(t *testing.T) {
"raw_storage_endpoint": true,
"introspection_endpoint": false,
"disable_sentinel_trace": true,
"detect_deadlocks": "",
"enable_ui": true,
"enable_response_header_hostname": false,
"enable_response_header_raft_node_id": false,

View File

@ -1,21 +0,0 @@
//go:build deadlock
package locking
import (
"github.com/sasha-s/go-deadlock"
)
// DeadlockMutex, when the build tag `deadlock` is present, behaves like a
// sync.Mutex but does periodic checking to see if outstanding locks and requests
// look like a deadlock. If it finds a deadlock candidate it will output it
// prefixed with "POTENTIAL DEADLOCK", as described at
// https://github.com/sasha-s/go-deadlock
type DeadlockMutex struct {
deadlock.Mutex
}
// DeadlockRWMutex is the RW version of DeadlockMutex.
type DeadlockRWMutex struct {
deadlock.RWMutex
}

View File

@ -1,19 +1,46 @@
//go:build !deadlock
package locking
import (
"sync"
"github.com/sasha-s/go-deadlock"
)
// DeadlockMutex is just a sync.Mutex when the build tag `deadlock` is absent.
// See its other definition in the corresponding deadlock-build-tag-constrained
// file for more details.
// Common mutex interface to allow either built-in or imported deadlock use
type Mutex interface {
Lock()
Unlock()
}
// Common r/w mutex interface to allow either built-in or imported deadlock use
type RWMutex interface {
Lock()
RLock()
RLocker() sync.Locker
RUnlock()
Unlock()
}
// DeadlockMutex (used when requested via config option `detact_deadlocks`),
// behaves like a sync.Mutex but does periodic checking to see if outstanding
// locks and requests look like a deadlock. If it finds a deadlock candidate it
// will output it prefixed with "POTENTIAL DEADLOCK", as described at
// https://github.com/sasha-s/go-deadlock
type DeadlockMutex struct {
sync.Mutex
deadlock.Mutex
}
// DeadlockRWMutex is the RW version of DeadlockMutex.
type DeadlockRWMutex struct {
deadlock.RWMutex
}
// Regular sync/mutex.
type SyncMutex struct {
sync.Mutex
}
// DeadlockRWMutex is the RW version of SyncMutex.
type SyncRWMutex struct {
sync.RWMutex
}

View File

@ -39,6 +39,7 @@ func TestSysConfigState_Sanitized(t *testing.T) {
"disable_printable_check": false,
"disable_sealwrap": false,
"raw_storage_endpoint": false,
"detect_deadlocks": "",
"introspection_endpoint": false,
"disable_sentinel_trace": false,
"enable_ui": false,

View File

@ -316,7 +316,7 @@ type Core struct {
auditBackends map[string]audit.Factory
// stateLock protects mutable state
stateLock locking.DeadlockRWMutex
stateLock locking.RWMutex
sealed *uint32
standby bool
@ -732,6 +732,9 @@ type CoreConfig struct {
Logger log.Logger
// Use the deadlocks library to detect deadlocks
DetectDeadlocks string
// Disables the trace display for Sentinel checks
DisableSentinelTrace bool
@ -904,6 +907,14 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
conf.NumExpirationWorkers = numExpirationWorkersDefault
}
// Use imported logging deadlock if requested
var stateLock locking.RWMutex
if strings.Contains(conf.DetectDeadlocks, "statelock") {
stateLock = &locking.DeadlockRWMutex{}
} else {
stateLock = &locking.SyncRWMutex{}
}
effectiveSDKVersion := conf.EffectiveSDKVersion
if effectiveSDKVersion == "" {
effectiveSDKVersion = version.GetVersion().Version
@ -922,6 +933,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
clusterListener: new(atomic.Value),
customListenerHeader: new(atomic.Value),
seal: conf.Seal,
stateLock: stateLock,
router: NewRouter(),
sealed: new(uint32),
sealMigrationDone: new(uint32),

View File

@ -6,6 +6,7 @@ import (
"reflect"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
@ -21,6 +22,7 @@ import (
"github.com/hashicorp/vault/sdk/physical"
"github.com/hashicorp/vault/sdk/physical/inmem"
"github.com/hashicorp/vault/version"
"github.com/sasha-s/go-deadlock"
)
// invalidKey is used to test Unseal
@ -2836,3 +2838,51 @@ func TestCore_ServiceRegistration(t *testing.T) {
t.Fatal(diff)
}
}
func TestDetectedDeadlock(t *testing.T) {
testCore, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{DetectDeadlocks: "statelock"})
InduceDeadlock(t, testCore, 1)
}
func TestDefaultDeadlock(t *testing.T) {
testCore, _, _ := TestCoreUnsealed(t)
InduceDeadlock(t, testCore, 0)
}
func RestoreDeadlockOpts() func() {
opts := deadlock.Opts
return func() {
deadlock.Opts = opts
}
}
func InduceDeadlock(t *testing.T, vaultcore *Core, expected uint32) {
defer RestoreDeadlockOpts()()
var deadlocks uint32
deadlock.Opts.OnPotentialDeadlock = func() {
atomic.AddUint32(&deadlocks, 1)
}
var mtx deadlock.Mutex
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
vaultcore.expiration.coreStateLock.Lock()
mtx.Lock()
mtx.Unlock()
vaultcore.expiration.coreStateLock.Unlock()
}()
wg.Wait()
wg.Add(1)
go func() {
defer wg.Done()
mtx.Lock()
vaultcore.expiration.coreStateLock.RLock()
vaultcore.expiration.coreStateLock.RUnlock()
mtx.Unlock()
}()
wg.Wait()
if atomic.LoadUint32(&deadlocks) != expected {
t.Fatalf("expected 1 deadlock, detected %d", deadlocks)
}
}

View File

@ -139,7 +139,7 @@ type ExpirationManager struct {
quitCh chan struct{}
// do not hold coreStateLock in any API handler code - it is already held
coreStateLock *locking.DeadlockRWMutex
coreStateLock locking.RWMutex
quitContext context.Context
leaseCheckCounter *uint32
@ -350,7 +350,7 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
restoreLocks: locksutil.CreateLocks(),
quitCh: make(chan struct{}),
coreStateLock: &c.stateLock,
coreStateLock: c.stateLock,
quitContext: c.activeContext,
leaseCheckCounter: new(uint32),

View File

@ -0,0 +1,5 @@
//go:build deadlock
package vault
const TestDeadlockDetection = "statelock"

View File

@ -0,0 +1,5 @@
//go:build !deadlock
package vault
const TestDeadlockDetection = ""

View File

@ -212,6 +212,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core {
conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname
conf.DisableSSCTokens = opts.DisableSSCTokens
conf.PluginDirectory = opts.PluginDirectory
conf.DetectDeadlocks = opts.DetectDeadlocks
if opts.Logger != nil {
conf.Logger = opts.Logger
@ -1382,6 +1383,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
var baseAddr *net.TCPAddr
if opts != nil && opts.BaseListenAddress != "" {
baseAddr, err = net.ResolveTCPAddr("tcp", opts.BaseListenAddress)
if err != nil {
t.Fatal("could not parse given base IP")
}
@ -1633,6 +1635,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
}
if base != nil {
coreConfig.DetectDeadlocks = TestDeadlockDetection
coreConfig.RawConfig = base.RawConfig
coreConfig.DisableCache = base.DisableCache
coreConfig.EnableUI = base.EnableUI

View File

@ -149,6 +149,11 @@ to specify where the configuration is.
maximum request duration allowed before Vault cancels the request. This can
be overridden per listener via the `max_request_duration` value.
- `detect_deadlocks` `(string: "")` - Specifies the internal mutex locks that should be monitored for
potential deadlocks. Currently supported value is `statelock`, which will cause "POTENTIAL DEADLOCK:"
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this can have
a negative effect on performance due to the tracking of each lock attempt.
- `raw_storage_endpoint` `(bool: false)` Enables the `sys/raw` endpoint which
allows the decryption/encryption of raw data into and out of the security
barrier. This is a highly privileged endpoint.