3e55e79a3f
* k8s doc: update for 0.9.1 and 0.8.0 releases (#10825) * k8s doc: update for 0.9.1 and 0.8.0 releases * Update website/content/docs/platform/k8s/helm/configuration.mdx Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> * Autopilot initial commit * Move autopilot related backend implementations to its own file * Abstract promoter creation * Add nil check for health * Add server state oss no-ops * Config ext stub for oss * Make way for non-voters * s/health/state * s/ReadReplica/NonVoter * Add synopsis and description * Remove struct tags from AutopilotConfig * Use var for config storage path * Handle nin-config when reading * Enable testing autopilot by using inmem cluster * First passing test * Only report the server as known if it is present in raft config * Autopilot defaults to on for all existing and new clusters * Add locking to some functions * Persist initial config * Clarify the command usage doc * Add health metric for each node * Fix audit logging issue * Don't set DisablePerformanceStandby to true in test * Use node id label for health metric * Log updates to autopilot config * Less aggressively consume config loading failures * Return a mutable config * Return early from known servers if raft config is unable to be pulled * Update metrics name * Reduce log level for potentially noisy log * Add knob to disable autopilot * Don't persist if default config is in use * Autopilot: Dead server cleanup (#10857) * Dead server cleanup * Initialize channel in any case * Fix a bunch of tests * Fix panic * Add follower locking in heartbeat tracker * Add LastContactFailureThreshold to config * Add log when marking node as dead * Update follower state locking in heartbeat tracker * Avoid follower states being nil * Pull test to its own file * Add execution status to state response * Optionally enable autopilot in some tests * Updates * Added API function to fetch autopilot configuration * Add test for default autopilot configuration * Configuration tests * Add State API test * Update test * Added TestClusterOptions.PhysicalFactoryConfig * Update locking * Adjust locking in heartbeat tracker * s/last_contact_failure_threshold/left_server_last_contact_threshold * Add disabling autopilot as a core config option * Disable autopilot in some tests * s/left_server_last_contact_threshold/dead_server_last_contact_threshold * Set the lastheartbeat of followers to now when setting up active node * Don't use config defaults from CLI command * Remove config file support * Remove HCL test as well * Persist only supplied config; merge supplied config with default to operate * Use pointer to structs for storing follower information * Test update * Retrieve non voter status from configbucket and set it up when a node comes up * Manage desired suffrage * Consider bucket being created already * Move desired suffrage to its own entry * s/DesiredSuffrageKey/LocalNodeConfigKey * s/witnessSuffrage/recordSuffrage * Fix test compilation * Handle local node config post a snapshot install * Commit to storage first; then record suffrage in fsm * No need of local node config being nili case, post snapshot restore * Reconcile autopilot config when a new leader takes over duty * Grab fsm lock when recording suffrage * s/Suffrage/DesiredSuffrage in FollowerState * Instantiate autopilot only in leader * Default to old ways in more scenarios * Make API gracefully handle 404 * Address some feedback * Make IsDead an atomic.Value * Simplify follower hearbeat tracking * Use uber.atomic * Don't have multiple causes for having autopilot disabled * Don't remove node from follower states if we fail to remove the dead server * Autopilot server removals map (#11019) * Don't remove node from follower states if we fail to remove the dead server * Use map to track dead server removals * Use lock and map * Use delegate lock * Adjust when to remove entry from map * Only hold the lock while accessing map * Fix race * Don't set default min_quorum * Fix test * Ensure follower states is not nil before starting autopilot * Fix race Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>
162 lines
3.3 KiB
Go
162 lines
3.3 KiB
Go
package raft
|
|
|
|
import (
|
|
"context"
|
|
fmt "fmt"
|
|
"io/ioutil"
|
|
"math/rand"
|
|
"os"
|
|
"sort"
|
|
"testing"
|
|
|
|
"github.com/go-test/deep"
|
|
proto "github.com/golang/protobuf/proto"
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/raft"
|
|
"github.com/hashicorp/vault/sdk/physical"
|
|
)
|
|
|
|
func getFSM(t testing.TB) (*FSM, string) {
|
|
raftDir, err := ioutil.TempDir("", "vault-raft-")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
t.Logf("raft dir: %s", raftDir)
|
|
|
|
logger := hclog.New(&hclog.LoggerOptions{
|
|
Name: "raft",
|
|
Level: hclog.Trace,
|
|
})
|
|
|
|
fsm, err := NewFSM(raftDir, "", logger)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
return fsm, raftDir
|
|
}
|
|
|
|
func TestFSM_Batching(t *testing.T) {
|
|
fsm, dir := getFSM(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
var index uint64
|
|
var term uint64 = 1
|
|
|
|
getLog := func(i uint64) (int, *raft.Log) {
|
|
if rand.Intn(10) >= 8 {
|
|
term += 1
|
|
return 0, &raft.Log{
|
|
Index: i,
|
|
Term: term,
|
|
Type: raft.LogConfiguration,
|
|
Data: raft.EncodeConfiguration(raft.Configuration{
|
|
Servers: []raft.Server{
|
|
raft.Server{
|
|
Address: raft.ServerAddress("test"),
|
|
ID: raft.ServerID("test"),
|
|
},
|
|
},
|
|
}),
|
|
}
|
|
}
|
|
|
|
command := &LogData{
|
|
Operations: make([]*LogOperation, rand.Intn(10)),
|
|
}
|
|
|
|
for j := range command.Operations {
|
|
command.Operations[j] = &LogOperation{
|
|
OpType: putOp,
|
|
Key: fmt.Sprintf("key-%d-%d", i, j),
|
|
Value: []byte(fmt.Sprintf("value-%d-%d", i, j)),
|
|
}
|
|
}
|
|
commandBytes, err := proto.Marshal(command)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
return len(command.Operations), &raft.Log{
|
|
Index: i,
|
|
Term: term,
|
|
Type: raft.LogCommand,
|
|
Data: commandBytes,
|
|
}
|
|
}
|
|
|
|
totalKeys := 0
|
|
for i := 0; i < 100; i++ {
|
|
batchSize := rand.Intn(64)
|
|
batch := make([]*raft.Log, batchSize)
|
|
for j := 0; j < batchSize; j++ {
|
|
var keys int
|
|
index++
|
|
keys, batch[j] = getLog(index)
|
|
totalKeys += keys
|
|
}
|
|
|
|
resp := fsm.ApplyBatch(batch)
|
|
if len(resp) != batchSize {
|
|
t.Fatalf("incorrect response length: got %d expected %d", len(resp), batchSize)
|
|
}
|
|
|
|
for _, r := range resp {
|
|
if _, ok := r.(*FSMApplyResponse); !ok {
|
|
t.Fatal("bad response type")
|
|
}
|
|
}
|
|
}
|
|
|
|
keys, err := fsm.List(context.Background(), "")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
if len(keys) != totalKeys {
|
|
t.Fatalf("incorrect number of keys: got %d expected %d", len(keys), totalKeys)
|
|
}
|
|
|
|
latestIndex, latestConfig := fsm.LatestState()
|
|
if latestIndex.Index != index {
|
|
t.Fatalf("bad latest index: got %d expected %d", latestIndex.Index, index)
|
|
}
|
|
if latestIndex.Term != term {
|
|
t.Fatalf("bad latest term: got %d expected %d", latestIndex.Term, term)
|
|
}
|
|
|
|
if latestConfig == nil && term > 1 {
|
|
t.Fatal("config wasn't updated")
|
|
}
|
|
}
|
|
|
|
func TestFSM_List(t *testing.T) {
|
|
fsm, dir := getFSM(t)
|
|
defer os.RemoveAll(dir)
|
|
|
|
ctx := context.Background()
|
|
count := 100
|
|
keys := rand.Perm(count)
|
|
var sorted []string
|
|
for _, k := range keys {
|
|
err := fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/bar", k)})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
err = fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/baz", k)})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
sorted = append(sorted, fmt.Sprintf("%d/", k))
|
|
}
|
|
sort.Strings(sorted)
|
|
|
|
got, err := fsm.List(ctx, "foo/")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
sort.Strings(got)
|
|
if diff := deep.Equal(sorted, got); len(diff) > 0 {
|
|
t.Fatal(diff)
|
|
}
|
|
}
|