backport of commit d407078766e14c883978d496895aa4a068c87c68 (#22058)

Co-authored-by: Josh Black <raskchanky@gmail.com>
This commit is contained in:
hc-github-team-secure-vault-core 2023-07-25 20:27:13 -04:00 committed by GitHub
parent 96c35b3ab7
commit a08da20b50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 18 additions and 2 deletions

3
changelog/22040.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
storage/raft: Cap the minimum dead_server_last_contact_threshold to 1m.
```

View File

@ -190,6 +190,14 @@ func TestRaft_Autopilot_Configuration(t *testing.T) {
writeConfigFunc(writableConfig, true) writeConfigFunc(writableConfig, true)
configCheckFunc(config) configCheckFunc(config)
// Check dead server last contact threshold minimum
writableConfig = map[string]interface{}{
"cleanup_dead_servers": true,
"dead_server_last_contact_threshold": "5s",
}
writeConfigFunc(writableConfig, true)
configCheckFunc(config)
// Ensure that the configuration stays across reboots // Ensure that the configuration stays across reboots
leaderCore := cluster.Cores[0] leaderCore := cluster.Cores[0]
testhelpers.EnsureCoreSealed(t, cluster.Cores[0]) testhelpers.EnsureCoreSealed(t, cluster.Cores[0])
@ -442,7 +450,7 @@ func TestRaft_Autopilot_DeadServerCleanup(t *testing.T) {
// Ensure Autopilot has the aggressive settings // Ensure Autopilot has the aggressive settings
config.CleanupDeadServers = true config.CleanupDeadServers = true
config.ServerStabilizationTime = 5 * time.Second config.ServerStabilizationTime = 5 * time.Second
config.DeadServerLastContactThreshold = 10 * time.Second config.DeadServerLastContactThreshold = 1 * time.Minute
config.MaxTrailingLogs = 10 config.MaxTrailingLogs = 10
config.LastContactThreshold = 10 * time.Second config.LastContactThreshold = 10 * time.Second
config.MinQuorum = 3 config.MinQuorum = 3

View File

@ -533,6 +533,10 @@ func (b *SystemBackend) handleStorageRaftAutopilotConfigUpdate() framework.Opera
return logical.ErrorResponse(fmt.Sprintf("min_quorum must be set when cleanup_dead_servers is set and it should at least be 3; cleanup_dead_servers: %#v, min_quorum: %#v", effectiveConf.CleanupDeadServers, effectiveConf.MinQuorum)), logical.ErrInvalidRequest return logical.ErrorResponse(fmt.Sprintf("min_quorum must be set when cleanup_dead_servers is set and it should at least be 3; cleanup_dead_servers: %#v, min_quorum: %#v", effectiveConf.CleanupDeadServers, effectiveConf.MinQuorum)), logical.ErrInvalidRequest
} }
if effectiveConf.CleanupDeadServers && effectiveConf.DeadServerLastContactThreshold.Seconds() < 60 {
return logical.ErrorResponse(fmt.Sprintf("dead_server_last_contact_threshold should not be set to less than 1m; received: %v", deadServerLastContactThreshold)), logical.ErrInvalidRequest
}
// Persist only the user supplied fields // Persist only the user supplied fields
if persist { if persist {
entry, err := logical.StorageEntryJSON(raftAutopilotConfigurationStoragePath, config) entry, err := logical.StorageEntryJSON(raftAutopilotConfigurationStoragePath, config)

View File

@ -210,7 +210,8 @@ This endpoint is used to modify the configuration of the autopilot subsystem of
- `dead_server_last_contact_threshold` `(string: "24h")` - Limit on the amount of time - `dead_server_last_contact_threshold` `(string: "24h")` - Limit on the amount of time
a server can go without leader contact before being considered failed. This a server can go without leader contact before being considered failed. This
takes effect only when `cleanup_dead_servers` is `true`. takes effect only when `cleanup_dead_servers` is `true`. This can not be set to a value
smaller than 1m.
- `max_trailing_logs` `(int: 1000)` - Amount of entries in the Raft Log that a server - `max_trailing_logs` `(int: 1000)` - Amount of entries in the Raft Log that a server
can be behind before being considered unhealthy. can be behind before being considered unhealthy.