From 1b745aef58801902ce7ccd1cb578b8d189f5b25d Mon Sep 17 00:00:00 2001 From: Nick Cabatoff Date: Wed, 7 Dec 2022 14:17:45 -0500 Subject: [PATCH] Prevent autopilot from demoting voters when they join a 2nd time (#18263) --- changelog/18263.txt | 5 ++++ .../raft/raft_autopilot_test.go | 10 +++---- vault/external_tests/raft/raft_test.go | 26 +++++++++---------- vault/logical_system_raft.go | 4 +-- 4 files changed, 25 insertions(+), 20 deletions(-) create mode 100644 changelog/18263.txt diff --git a/changelog/18263.txt b/changelog/18263.txt new file mode 100644 index 000000000..1cdad85b1 --- /dev/null +++ b/changelog/18263.txt @@ -0,0 +1,5 @@ +```release-note:bug +storage/raft (enterprise): An already joined node can rejoin by wiping storage +and re-issueing a join request, but in doing so could transiently become a +non-voter. In some scenarios this resulted in loss of quorum. +``` diff --git a/vault/external_tests/raft/raft_autopilot_test.go b/vault/external_tests/raft/raft_autopilot_test.go index 731393ca9..7d7b9822e 100644 --- a/vault/external_tests/raft/raft_autopilot_test.go +++ b/vault/external_tests/raft/raft_autopilot_test.go @@ -24,7 +24,7 @@ import ( ) func TestRaft_Autopilot_Disable(t *testing.T) { - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ DisableFollowerJoins: true, InmemCluster: true, // Not setting EnableAutopilot here. @@ -38,7 +38,7 @@ func TestRaft_Autopilot_Disable(t *testing.T) { } func TestRaft_Autopilot_Stabilization_And_State(t *testing.T) { - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ DisableFollowerJoins: true, InmemCluster: true, EnableAutopilot: true, @@ -109,7 +109,7 @@ func TestRaft_Autopilot_Stabilization_And_State(t *testing.T) { } func TestRaft_Autopilot_Configuration(t *testing.T) { - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ DisableFollowerJoins: true, InmemCluster: true, EnableAutopilot: true, @@ -301,7 +301,7 @@ func TestRaft_Autopilot_Stabilization_Delay(t *testing.T) { } func TestRaft_AutoPilot_Peersets_Equivalent(t *testing.T) { - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ InmemCluster: true, EnableAutopilot: true, DisableFollowerJoins: true, @@ -417,7 +417,7 @@ func join(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster // TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just // because it hasn't been heard from in some time. func TestRaft_VotersStayVoters(t *testing.T) { - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ DisableFollowerJoins: true, InmemCluster: true, EnableAutopilot: true, diff --git a/vault/external_tests/raft/raft_test.go b/vault/external_tests/raft/raft_test.go index 368580ab8..0f89377f4 100644 --- a/vault/external_tests/raft/raft_test.go +++ b/vault/external_tests/raft/raft_test.go @@ -48,7 +48,7 @@ type RaftClusterOpts struct { EffectiveSDKVersionMap map[int]string } -func raftCluster(t testing.TB, ropts *RaftClusterOpts) *vault.TestCluster { +func raftCluster(t testing.TB, ropts *RaftClusterOpts) (*vault.TestCluster, *vault.TestClusterOptions) { if ropts == nil { ropts = &RaftClusterOpts{} } @@ -82,7 +82,7 @@ func raftCluster(t testing.TB, ropts *RaftClusterOpts) *vault.TestCluster { cluster := vault.NewTestCluster(benchhelpers.TBtoT(t), conf, &opts) cluster.Start() vault.TestWaitActive(benchhelpers.TBtoT(t), cluster.Cores[0].Core) - return cluster + return cluster, &opts } func TestRaft_BoltDBMetrics(t *testing.T) { @@ -322,7 +322,7 @@ func TestRaft_Join(t *testing.T) { func TestRaft_RemovePeer(t *testing.T) { t.Parallel() - cluster := raftCluster(t, nil) + cluster, _ := raftCluster(t, nil) defer cluster.Cleanup() for i, c := range cluster.Cores { @@ -395,7 +395,7 @@ func TestRaft_NodeIDHeader(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - cluster := raftCluster(t, tc.ropts) + cluster, _ := raftCluster(t, tc.ropts) defer cluster.Cleanup() for i, c := range cluster.Cores { @@ -432,7 +432,7 @@ func TestRaft_NodeIDHeader(t *testing.T) { func TestRaft_Configuration(t *testing.T) { t.Parallel() - cluster := raftCluster(t, nil) + cluster, _ := raftCluster(t, nil) defer cluster.Cleanup() for i, c := range cluster.Cores { @@ -479,7 +479,7 @@ func TestRaft_Configuration(t *testing.T) { func TestRaft_ShamirUnseal(t *testing.T) { t.Parallel() - cluster := raftCluster(t, nil) + cluster, _ := raftCluster(t, nil) defer cluster.Cleanup() for i, c := range cluster.Cores { @@ -491,7 +491,7 @@ func TestRaft_ShamirUnseal(t *testing.T) { func TestRaft_SnapshotAPI(t *testing.T) { t.Parallel() - cluster := raftCluster(t, nil) + cluster, _ := raftCluster(t, nil) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -555,7 +555,7 @@ func TestRaft_SnapshotAPI_MidstreamFailure(t *testing.T) { if err != nil { t.Fatal(err) } - cluster := raftCluster(t, &RaftClusterOpts{ + cluster, _ := raftCluster(t, &RaftClusterOpts{ NumCores: 1, Seal: autoSeal, }) @@ -660,7 +660,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) { tCaseLocal := tCase t.Parallel() - cluster := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby}) + cluster, _ := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby}) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -861,7 +861,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { tCaseLocal := tCase t.Parallel() - cluster := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby}) + cluster, _ := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby}) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -1048,7 +1048,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) { t.Parallel() - cluster := raftCluster(t, nil) + cluster, _ := raftCluster(t, nil) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -1094,7 +1094,7 @@ func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) { // Cluster 2 { - cluster2 := raftCluster(t, nil) + cluster2, _ := raftCluster(t, nil) defer cluster2.Cleanup() leaderClient := cluster2.Cores[0].Client @@ -1141,7 +1141,7 @@ func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) { } func BenchmarkRaft_SingleNode(b *testing.B) { - cluster := raftCluster(b, nil) + cluster, _ := raftCluster(b, nil) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client diff --git a/vault/logical_system_raft.go b/vault/logical_system_raft.go index e44a1c472..b31c5535e 100644 --- a/vault/logical_system_raft.go +++ b/vault/logical_system_raft.go @@ -361,9 +361,9 @@ func (b *SystemBackend) handleRaftBootstrapAnswerWrite() framework.OperationFunc var desiredSuffrage string switch nonVoter { case true: - desiredSuffrage = "voter" - default: desiredSuffrage = "non-voter" + default: + desiredSuffrage = "voter" } if b.Core.raftFollowerStates != nil {