add core state lock deadlock detection config option v2 (#18604)
* add core state lockd eadlock detection config option v2 * add changelog * split out NewTestCluster function to maintain build flag * replace long func with constant * remove line * rename file, and move where detect deadlock flag is set
This commit is contained in:
parent
94a4f7d3c9
commit
6f7757e949
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
core: add `detect_deadlocks` config to optionally detect core state deadlocks
|
||||
```
|
|
@ -2619,6 +2619,7 @@ func createCoreConfig(c *ServerCommand, config *server.Config, backend physical.
|
|||
CredentialBackends: c.CredentialBackends,
|
||||
LogicalBackends: c.LogicalBackends,
|
||||
Logger: c.logger,
|
||||
DetectDeadlocks: config.DetectDeadlocks,
|
||||
DisableSentinelTrace: config.DisableSentinelTrace,
|
||||
DisableCache: config.DisableCache,
|
||||
DisableMlock: config.DisableMlock,
|
||||
|
|
|
@ -97,6 +97,8 @@ type Config struct {
|
|||
LogRequestsLevel string `hcl:"-"`
|
||||
LogRequestsLevelRaw interface{} `hcl:"log_requests_level"`
|
||||
|
||||
DetectDeadlocks string `hcl:"detect_deadlocks"`
|
||||
|
||||
EnableResponseHeaderRaftNodeID bool `hcl:"-"`
|
||||
EnableResponseHeaderRaftNodeIDRaw interface{} `hcl:"enable_response_header_raft_node_id"`
|
||||
|
||||
|
@ -389,6 +391,11 @@ func (c *Config) Merge(c2 *Config) *Config {
|
|||
result.LogRequestsLevel = c2.LogRequestsLevel
|
||||
}
|
||||
|
||||
result.DetectDeadlocks = c.DetectDeadlocks
|
||||
if c2.DetectDeadlocks != "" {
|
||||
result.DetectDeadlocks = c2.DetectDeadlocks
|
||||
}
|
||||
|
||||
result.EnableResponseHeaderRaftNodeID = c.EnableResponseHeaderRaftNodeID
|
||||
if c2.EnableResponseHeaderRaftNodeID {
|
||||
result.EnableResponseHeaderRaftNodeID = c2.EnableResponseHeaderRaftNodeID
|
||||
|
@ -1025,6 +1032,8 @@ func (c *Config) Sanitized() map[string]interface{} {
|
|||
"enable_response_header_raft_node_id": c.EnableResponseHeaderRaftNodeID,
|
||||
|
||||
"log_requests_level": c.LogRequestsLevel,
|
||||
|
||||
"detect_deadlocks": c.DetectDeadlocks,
|
||||
}
|
||||
for k, v := range sharedResult {
|
||||
result[k] = v
|
||||
|
|
|
@ -745,6 +745,7 @@ func testConfig_Sanitized(t *testing.T) {
|
|||
"raw_storage_endpoint": true,
|
||||
"introspection_endpoint": false,
|
||||
"disable_sentinel_trace": true,
|
||||
"detect_deadlocks": "",
|
||||
"enable_ui": true,
|
||||
"enable_response_header_hostname": false,
|
||||
"enable_response_header_raft_node_id": false,
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
//go:build deadlock
|
||||
|
||||
package locking
|
||||
|
||||
import (
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
)
|
||||
|
||||
// DeadlockMutex, when the build tag `deadlock` is present, behaves like a
|
||||
// sync.Mutex but does periodic checking to see if outstanding locks and requests
|
||||
// look like a deadlock. If it finds a deadlock candidate it will output it
|
||||
// prefixed with "POTENTIAL DEADLOCK", as described at
|
||||
// https://github.com/sasha-s/go-deadlock
|
||||
type DeadlockMutex struct {
|
||||
deadlock.Mutex
|
||||
}
|
||||
|
||||
// DeadlockRWMutex is the RW version of DeadlockMutex.
|
||||
type DeadlockRWMutex struct {
|
||||
deadlock.RWMutex
|
||||
}
|
|
@ -1,19 +1,46 @@
|
|||
//go:build !deadlock
|
||||
|
||||
package locking
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
)
|
||||
|
||||
// DeadlockMutex is just a sync.Mutex when the build tag `deadlock` is absent.
|
||||
// See its other definition in the corresponding deadlock-build-tag-constrained
|
||||
// file for more details.
|
||||
// Common mutex interface to allow either built-in or imported deadlock use
|
||||
type Mutex interface {
|
||||
Lock()
|
||||
Unlock()
|
||||
}
|
||||
|
||||
// Common r/w mutex interface to allow either built-in or imported deadlock use
|
||||
type RWMutex interface {
|
||||
Lock()
|
||||
RLock()
|
||||
RLocker() sync.Locker
|
||||
RUnlock()
|
||||
Unlock()
|
||||
}
|
||||
|
||||
// DeadlockMutex (used when requested via config option `detact_deadlocks`),
|
||||
// behaves like a sync.Mutex but does periodic checking to see if outstanding
|
||||
// locks and requests look like a deadlock. If it finds a deadlock candidate it
|
||||
// will output it prefixed with "POTENTIAL DEADLOCK", as described at
|
||||
// https://github.com/sasha-s/go-deadlock
|
||||
type DeadlockMutex struct {
|
||||
sync.Mutex
|
||||
deadlock.Mutex
|
||||
}
|
||||
|
||||
// DeadlockRWMutex is the RW version of DeadlockMutex.
|
||||
type DeadlockRWMutex struct {
|
||||
deadlock.RWMutex
|
||||
}
|
||||
|
||||
// Regular sync/mutex.
|
||||
type SyncMutex struct {
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
// DeadlockRWMutex is the RW version of SyncMutex.
|
||||
type SyncRWMutex struct {
|
||||
sync.RWMutex
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ func TestSysConfigState_Sanitized(t *testing.T) {
|
|||
"disable_printable_check": false,
|
||||
"disable_sealwrap": false,
|
||||
"raw_storage_endpoint": false,
|
||||
"detect_deadlocks": "",
|
||||
"introspection_endpoint": false,
|
||||
"disable_sentinel_trace": false,
|
||||
"enable_ui": false,
|
||||
|
|
|
@ -316,7 +316,7 @@ type Core struct {
|
|||
auditBackends map[string]audit.Factory
|
||||
|
||||
// stateLock protects mutable state
|
||||
stateLock locking.DeadlockRWMutex
|
||||
stateLock locking.RWMutex
|
||||
sealed *uint32
|
||||
|
||||
standby bool
|
||||
|
@ -732,6 +732,9 @@ type CoreConfig struct {
|
|||
|
||||
Logger log.Logger
|
||||
|
||||
// Use the deadlocks library to detect deadlocks
|
||||
DetectDeadlocks string
|
||||
|
||||
// Disables the trace display for Sentinel checks
|
||||
DisableSentinelTrace bool
|
||||
|
||||
|
@ -904,6 +907,14 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
|
|||
conf.NumExpirationWorkers = numExpirationWorkersDefault
|
||||
}
|
||||
|
||||
// Use imported logging deadlock if requested
|
||||
var stateLock locking.RWMutex
|
||||
if strings.Contains(conf.DetectDeadlocks, "statelock") {
|
||||
stateLock = &locking.DeadlockRWMutex{}
|
||||
} else {
|
||||
stateLock = &locking.SyncRWMutex{}
|
||||
}
|
||||
|
||||
effectiveSDKVersion := conf.EffectiveSDKVersion
|
||||
if effectiveSDKVersion == "" {
|
||||
effectiveSDKVersion = version.GetVersion().Version
|
||||
|
@ -922,6 +933,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
|
|||
clusterListener: new(atomic.Value),
|
||||
customListenerHeader: new(atomic.Value),
|
||||
seal: conf.Seal,
|
||||
stateLock: stateLock,
|
||||
router: NewRouter(),
|
||||
sealed: new(uint32),
|
||||
sealMigrationDone: new(uint32),
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -21,6 +22,7 @@ import (
|
|||
"github.com/hashicorp/vault/sdk/physical"
|
||||
"github.com/hashicorp/vault/sdk/physical/inmem"
|
||||
"github.com/hashicorp/vault/version"
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
)
|
||||
|
||||
// invalidKey is used to test Unseal
|
||||
|
@ -2836,3 +2838,51 @@ func TestCore_ServiceRegistration(t *testing.T) {
|
|||
t.Fatal(diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectedDeadlock(t *testing.T) {
|
||||
testCore, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{DetectDeadlocks: "statelock"})
|
||||
InduceDeadlock(t, testCore, 1)
|
||||
}
|
||||
|
||||
func TestDefaultDeadlock(t *testing.T) {
|
||||
testCore, _, _ := TestCoreUnsealed(t)
|
||||
InduceDeadlock(t, testCore, 0)
|
||||
}
|
||||
|
||||
func RestoreDeadlockOpts() func() {
|
||||
opts := deadlock.Opts
|
||||
return func() {
|
||||
deadlock.Opts = opts
|
||||
}
|
||||
}
|
||||
|
||||
func InduceDeadlock(t *testing.T, vaultcore *Core, expected uint32) {
|
||||
defer RestoreDeadlockOpts()()
|
||||
var deadlocks uint32
|
||||
deadlock.Opts.OnPotentialDeadlock = func() {
|
||||
atomic.AddUint32(&deadlocks, 1)
|
||||
}
|
||||
var mtx deadlock.Mutex
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
vaultcore.expiration.coreStateLock.Lock()
|
||||
mtx.Lock()
|
||||
mtx.Unlock()
|
||||
vaultcore.expiration.coreStateLock.Unlock()
|
||||
}()
|
||||
wg.Wait()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
mtx.Lock()
|
||||
vaultcore.expiration.coreStateLock.RLock()
|
||||
vaultcore.expiration.coreStateLock.RUnlock()
|
||||
mtx.Unlock()
|
||||
}()
|
||||
wg.Wait()
|
||||
if atomic.LoadUint32(&deadlocks) != expected {
|
||||
t.Fatalf("expected 1 deadlock, detected %d", deadlocks)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,7 +139,7 @@ type ExpirationManager struct {
|
|||
quitCh chan struct{}
|
||||
|
||||
// do not hold coreStateLock in any API handler code - it is already held
|
||||
coreStateLock *locking.DeadlockRWMutex
|
||||
coreStateLock locking.RWMutex
|
||||
quitContext context.Context
|
||||
leaseCheckCounter *uint32
|
||||
|
||||
|
@ -350,7 +350,7 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
|
|||
restoreLocks: locksutil.CreateLocks(),
|
||||
quitCh: make(chan struct{}),
|
||||
|
||||
coreStateLock: &c.stateLock,
|
||||
coreStateLock: c.stateLock,
|
||||
quitContext: c.activeContext,
|
||||
leaseCheckCounter: new(uint32),
|
||||
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
//go:build deadlock
|
||||
|
||||
package vault
|
||||
|
||||
const TestDeadlockDetection = "statelock"
|
|
@ -0,0 +1,5 @@
|
|||
//go:build !deadlock
|
||||
|
||||
package vault
|
||||
|
||||
const TestDeadlockDetection = ""
|
|
@ -212,6 +212,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core {
|
|||
conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname
|
||||
conf.DisableSSCTokens = opts.DisableSSCTokens
|
||||
conf.PluginDirectory = opts.PluginDirectory
|
||||
conf.DetectDeadlocks = opts.DetectDeadlocks
|
||||
|
||||
if opts.Logger != nil {
|
||||
conf.Logger = opts.Logger
|
||||
|
@ -1382,6 +1383,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
|
|||
var baseAddr *net.TCPAddr
|
||||
if opts != nil && opts.BaseListenAddress != "" {
|
||||
baseAddr, err = net.ResolveTCPAddr("tcp", opts.BaseListenAddress)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal("could not parse given base IP")
|
||||
}
|
||||
|
@ -1633,6 +1635,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
|
|||
}
|
||||
|
||||
if base != nil {
|
||||
coreConfig.DetectDeadlocks = TestDeadlockDetection
|
||||
coreConfig.RawConfig = base.RawConfig
|
||||
coreConfig.DisableCache = base.DisableCache
|
||||
coreConfig.EnableUI = base.EnableUI
|
||||
|
|
|
@ -149,6 +149,11 @@ to specify where the configuration is.
|
|||
maximum request duration allowed before Vault cancels the request. This can
|
||||
be overridden per listener via the `max_request_duration` value.
|
||||
|
||||
- `detect_deadlocks` `(string: "")` - Specifies the internal mutex locks that should be monitored for
|
||||
potential deadlocks. Currently supported value is `statelock`, which will cause "POTENTIAL DEADLOCK:"
|
||||
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this can have
|
||||
a negative effect on performance due to the tracking of each lock attempt.
|
||||
|
||||
- `raw_storage_endpoint` `(bool: false)` – Enables the `sys/raw` endpoint which
|
||||
allows the decryption/encryption of raw data into and out of the security
|
||||
barrier. This is a highly privileged endpoint.
|
||||
|
|
Loading…
Reference in New Issue