open-vault/physical/raft/fsm_test.go

package raft

import (
	"context"
	"fmt"
	"io/ioutil"
	"math/rand"
	"os"
	"sort"
	"testing"

	"github.com/go-test/deep"
	"github.com/golang/protobuf/proto"
	"github.com/hashicorp/go-hclog"
	"github.com/hashicorp/raft"
	"github.com/hashicorp/vault/sdk/physical"
)

func getFSM(t testing.TB) (*FSM, string) {
	raftDir, err := ioutil.TempDir("", "vault-raft-")
	if err != nil {
		t.Fatal(err)
	}
	t.Logf("raft dir: %s", raftDir)

	logger := hclog.New(&hclog.LoggerOptions{
		Name:  "raft",
		Level: hclog.Trace,
	})

	fsm, err := NewFSM(raftDir, "", logger)
	if err != nil {
		t.Fatal(err)
	}

	return fsm, raftDir
}

func TestFSM_Batching(t *testing.T) {
	fsm, dir := getFSM(t)
	defer func() { _ = os.RemoveAll(dir) }()

	var index uint64
	var term uint64 = 1

	getLog := func(i uint64) (int, *raft.Log) {
		if rand.Intn(10) >= 8 {
			term += 1
			return 0, &raft.Log{
				Index: i,
				Term:  term,
				Type:  raft.LogConfiguration,
				Data: raft.EncodeConfiguration(raft.Configuration{
					Servers: []raft.Server{
						{
							Address: "test",
							ID:      "test",
						},
					},
				}),
			}
		}

		command := &LogData{
			Operations: make([]*LogOperation, rand.Intn(10)),
		}

		for j := range command.Operations {
			command.Operations[j] = &LogOperation{
				OpType: putOp,
				Key:    fmt.Sprintf("key-%d-%d", i, j),
				Value:  []byte(fmt.Sprintf("value-%d-%d", i, j)),
			}
		}
		commandBytes, err := proto.Marshal(command)
		if err != nil {
			t.Fatal(err)
		}
		return len(command.Operations), &raft.Log{
			Index: i,
			Term:  term,
			Type:  raft.LogCommand,
			Data:  commandBytes,
		}
	}

	totalKeys := 0
	for i := 0; i < 100; i++ {
		batchSize := rand.Intn(64)
		batch := make([]*raft.Log, batchSize)
		for j := 0; j < batchSize; j++ {
			var keys int
			index++
			keys, batch[j] = getLog(index)
			totalKeys += keys
		}

		resp := fsm.ApplyBatch(batch)
		if len(resp) != batchSize {
			t.Fatalf("incorrect response length: got %d expected %d", len(resp), batchSize)
		}

		for _, r := range resp {
			if _, ok := r.(*FSMApplyResponse); !ok {
				t.Fatal("bad response type")
			}
		}
	}

	keys, err := fsm.List(context.Background(), "")
	if err != nil {
		t.Fatal(err)
	}

	if len(keys) != totalKeys {
		t.Fatalf("incorrect number of keys: got %d expected %d", len(keys), totalKeys)
	}

	latestIndex, latestConfig := fsm.LatestState()
	if latestIndex.Index != index {
		t.Fatalf("bad latest index: got %d expected %d", latestIndex.Index, index)
	}
	if latestIndex.Term != term {
		t.Fatalf("bad latest term: got %d expected %d", latestIndex.Term, term)
	}

	if latestConfig == nil && term > 1 {
		t.Fatal("config wasn't updated")
	}
}

func TestFSM_List(t *testing.T) {
	fsm, dir := getFSM(t)
	defer func() { _ = os.RemoveAll(dir) }()

	ctx := context.Background()
	count := 100
	keys := rand.Perm(count)
	var sorted []string
	for _, k := range keys {
		err := fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/bar", k)})
		if err != nil {
			t.Fatal(err)
		}
		err = fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/baz", k)})
		if err != nil {
			t.Fatal(err)
		}
		sorted = append(sorted, fmt.Sprintf("%d/", k))
	}
	sort.Strings(sorted)

	got, err := fsm.List(ctx, "foo/")
	if err != nil {
		t.Fatal(err)
	}
	sort.Strings(got)
	if diff := deep.Equal(sorted, got); len(diff) > 0 {
		t.Fatal(diff)
	}
}
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`package raft`

			`import (`
			`"context"`
merkle sync undo logs (#17103) 2022-09-13 17:03:19 +00:00			`"fmt"`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`"io/ioutil"`
			`"math/rand"`
			`"os"`
Avoid O(n^2) lookup to remove duplicate subfolders in list output. (#9694) 2020-08-31 13:23:34 +00:00			`"sort"`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`"testing"`

Avoid O(n^2) lookup to remove duplicate subfolders in list output. (#9694) 2020-08-31 13:23:34 +00:00			`"github.com/go-test/deep"`
merkle sync undo logs (#17103) 2022-09-13 17:03:19 +00:00			`"github.com/golang/protobuf/proto"`
			`"github.com/hashicorp/go-hclog"`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`"github.com/hashicorp/raft"`
Avoid O(n^2) lookup to remove duplicate subfolders in list output. (#9694) 2020-08-31 13:23:34 +00:00			`"github.com/hashicorp/vault/sdk/physical"`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`)`

			`func getFSM(t testing.TB) (*FSM, string) {`
			`raftDir, err := ioutil.TempDir("", "vault-raft-")`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`t.Logf("raft dir: %s", raftDir)`

			`logger := hclog.New(&hclog.LoggerOptions{`
			`Name: "raft",`
			`Level: hclog.Trace,`
			`})`

Autopilot: Server Stabilization, State and Dead Server Cleanup (#10856) * k8s doc: update for 0.9.1 and 0.8.0 releases (#10825) * k8s doc: update for 0.9.1 and 0.8.0 releases * Update website/content/docs/platform/k8s/helm/configuration.mdx Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> * Autopilot initial commit * Move autopilot related backend implementations to its own file * Abstract promoter creation * Add nil check for health * Add server state oss no-ops * Config ext stub for oss * Make way for non-voters * s/health/state * s/ReadReplica/NonVoter * Add synopsis and description * Remove struct tags from AutopilotConfig * Use var for config storage path * Handle nin-config when reading * Enable testing autopilot by using inmem cluster * First passing test * Only report the server as known if it is present in raft config * Autopilot defaults to on for all existing and new clusters * Add locking to some functions * Persist initial config * Clarify the command usage doc * Add health metric for each node * Fix audit logging issue * Don't set DisablePerformanceStandby to true in test * Use node id label for health metric * Log updates to autopilot config * Less aggressively consume config loading failures * Return a mutable config * Return early from known servers if raft config is unable to be pulled * Update metrics name * Reduce log level for potentially noisy log * Add knob to disable autopilot * Don't persist if default config is in use * Autopilot: Dead server cleanup (#10857) * Dead server cleanup * Initialize channel in any case * Fix a bunch of tests * Fix panic * Add follower locking in heartbeat tracker * Add LastContactFailureThreshold to config * Add log when marking node as dead * Update follower state locking in heartbeat tracker * Avoid follower states being nil * Pull test to its own file * Add execution status to state response * Optionally enable autopilot in some tests * Updates * Added API function to fetch autopilot configuration * Add test for default autopilot configuration * Configuration tests * Add State API test * Update test * Added TestClusterOptions.PhysicalFactoryConfig * Update locking * Adjust locking in heartbeat tracker * s/last_contact_failure_threshold/left_server_last_contact_threshold * Add disabling autopilot as a core config option * Disable autopilot in some tests * s/left_server_last_contact_threshold/dead_server_last_contact_threshold * Set the lastheartbeat of followers to now when setting up active node * Don't use config defaults from CLI command * Remove config file support * Remove HCL test as well * Persist only supplied config; merge supplied config with default to operate * Use pointer to structs for storing follower information * Test update * Retrieve non voter status from configbucket and set it up when a node comes up * Manage desired suffrage * Consider bucket being created already * Move desired suffrage to its own entry * s/DesiredSuffrageKey/LocalNodeConfigKey * s/witnessSuffrage/recordSuffrage * Fix test compilation * Handle local node config post a snapshot install * Commit to storage first; then record suffrage in fsm * No need of local node config being nili case, post snapshot restore * Reconcile autopilot config when a new leader takes over duty * Grab fsm lock when recording suffrage * s/Suffrage/DesiredSuffrage in FollowerState * Instantiate autopilot only in leader * Default to old ways in more scenarios * Make API gracefully handle 404 * Address some feedback * Make IsDead an atomic.Value * Simplify follower hearbeat tracking * Use uber.atomic * Don't have multiple causes for having autopilot disabled * Don't remove node from follower states if we fail to remove the dead server * Autopilot server removals map (#11019) * Don't remove node from follower states if we fail to remove the dead server * Use map to track dead server removals * Use lock and map * Use delegate lock * Adjust when to remove entry from map * Only hold the lock while accessing map * Fix race * Don't set default min_quorum * Fix test * Ensure follower states is not nil before starting autopilot * Fix race Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> 2021-03-03 18:59:50 +00:00			`fsm, err := NewFSM(raftDir, "", logger)`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`if err != nil {`
			`t.Fatal(err)`
			`}`

			`return fsm, raftDir`
			`}`

			`func TestFSM_Batching(t *testing.T) {`
			`fsm, dir := getFSM(t)`
merkle sync undo logs (#17103) 2022-09-13 17:03:19 +00:00			`defer func() { _ = os.RemoveAll(dir) }()`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00
			`var index uint64`
			`var term uint64 = 1`

			`getLog := func(i uint64) (int, *raft.Log) {`
			`if rand.Intn(10) >= 8 {`
			`term += 1`
			`return 0, &raft.Log{`
			`Index: i,`
			`Term: term,`
			`Type: raft.LogConfiguration,`
			`Data: raft.EncodeConfiguration(raft.Configuration{`
			`Servers: []raft.Server{`
Run a more strict formatter over the code (#11312) * Update tooling * Run gofumpt * go mod vendor 2021-04-08 16:43:39 +00:00			`{`
merkle sync undo logs (#17103) 2022-09-13 17:03:19 +00:00			`Address: "test",`
			`ID: "test",`
Improve raft write performance by utilizing FSM Batching (#7527) * Start benchmark work * Add batching FSM function * dedupe some code * Update dependency on chunking FSM * fix raft external tests * fix go.mod * Add batching test * uncomment test * update raft deps * update vendor * Update physical/raft/fsm.go Co-Authored-By: Michel Vocks <michelvocks@gmail.com> * Update physical/raft/fsm.go 2019-10-14 15:25:07 +00:00			`},`
			`},`
			`}),`
			`}`
			`}`

			`command := &LogData{`
			`Operations: make([]*LogOperation, rand.Intn(10)),`
			`}`

			`for j := range command.Operations {`
			`command.Operations[j] = &LogOperation{`
			`OpType: putOp,`
			`Key: fmt.Sprintf("key-%d-%d", i, j),`
			`Value: []byte(fmt.Sprintf("value-%d-%d", i, j)),`
			`}`
			`}`
			`commandBytes, err := proto.Marshal(command)`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`return len(command.Operations), &raft.Log{`
			`Index: i,`
			`Term: term,`
			`Type: raft.LogCommand,`
			`Data: commandBytes,`
			`}`
			`}`

			`totalKeys := 0`
			`for i := 0; i < 100; i++ {`
			`batchSize := rand.Intn(64)`
			`batch := make([]*raft.Log, batchSize)`
			`for j := 0; j < batchSize; j++ {`
			`var keys int`
			`index++`
			`keys, batch[j] = getLog(index)`
			`totalKeys += keys`
			`}`

			`resp := fsm.ApplyBatch(batch)`
			`if len(resp) != batchSize {`
			`t.Fatalf("incorrect response length: got %d expected %d", len(resp), batchSize)`
			`}`

			`for _, r := range resp {`
			`if _, ok := r.(*FSMApplyResponse); !ok {`
			`t.Fatal("bad response type")`
			`}`
			`}`
			`}`

			`keys, err := fsm.List(context.Background(), "")`
			`if err != nil {`
			`t.Fatal(err)`
			`}`

			`if len(keys) != totalKeys {`
			`t.Fatalf("incorrect number of keys: got %d expected %d", len(keys), totalKeys)`
			`}`

			`latestIndex, latestConfig := fsm.LatestState()`
			`if latestIndex.Index != index {`
			`t.Fatalf("bad latest index: got %d expected %d", latestIndex.Index, index)`
			`}`
			`if latestIndex.Term != term {`
			`t.Fatalf("bad latest term: got %d expected %d", latestIndex.Term, term)`
			`}`

			`if latestConfig == nil && term > 1 {`
			`t.Fatal("config wasn't updated")`
			`}`
			`}`
Avoid O(n^2) lookup to remove duplicate subfolders in list output. (#9694) 2020-08-31 13:23:34 +00:00
			`func TestFSM_List(t *testing.T) {`
			`fsm, dir := getFSM(t)`
merkle sync undo logs (#17103) 2022-09-13 17:03:19 +00:00			`defer func() { _ = os.RemoveAll(dir) }()`
Avoid O(n^2) lookup to remove duplicate subfolders in list output. (#9694) 2020-08-31 13:23:34 +00:00
			`ctx := context.Background()`
			`count := 100`
			`keys := rand.Perm(count)`
			`var sorted []string`
			`for _, k := range keys {`
			`err := fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/bar", k)})`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`err = fsm.Put(ctx, &physical.Entry{Key: fmt.Sprintf("foo/%d/baz", k)})`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`sorted = append(sorted, fmt.Sprintf("%d/", k))`
			`}`
			`sort.Strings(sorted)`

			`got, err := fsm.List(ctx, "foo/")`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`sort.Strings(got)`
			`if diff := deep.Equal(sorted, got); len(diff) > 0 {`
			`t.Fatal(diff)`
			`}`
			`}`