From 5db952eada0c93b5881717619331f56144a50512 Mon Sep 17 00:00:00 2001 From: Nick Cabatoff Date: Tue, 6 Sep 2022 14:49:04 -0400 Subject: [PATCH] autopilot: assume nodes we haven't received heartbeats from are running the same version as we are (#17019) OSS parts of ent PR #3172: assume nodes we haven't received heartbeats from are running the same version as we are. Failing to provide a version/upgrade_version will result in Autopilot (on ent) demoting those unversioned nodes to non-voters until we receive a heartbeat from them. --- changelog/17019.txt | 3 ++ vault/cluster/inmem_layer.go | 2 +- .../raft/raft_autopilot_test.go | 51 +++++++++++++++++++ vault/raft.go | 2 + 4 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 changelog/17019.txt diff --git a/changelog/17019.txt b/changelog/17019.txt new file mode 100644 index 000000000..63e2da492 --- /dev/null +++ b/changelog/17019.txt @@ -0,0 +1,3 @@ +```release-note:bug +storage/raft: Nodes no longer get demoted to nonvoter if we don't know their version due to missing heartbeats. +``` diff --git a/vault/cluster/inmem_layer.go b/vault/cluster/inmem_layer.go index ca4f7cbe8..c5819136a 100644 --- a/vault/cluster/inmem_layer.go +++ b/vault/cluster/inmem_layer.go @@ -126,7 +126,7 @@ func (l *InmemLayer) Dial(addr string, timeout time.Duration, tlsConfig *tls.Con } if l.logger.IsDebug() { - l.logger.Debug("dailing connection", "node", l.addr, "remote", addr, "alpn", alpn) + l.logger.Debug("dialing connection", "node", l.addr, "remote", addr, "alpn", alpn) } if connectionCh != nil { diff --git a/vault/external_tests/raft/raft_autopilot_test.go b/vault/external_tests/raft/raft_autopilot_test.go index 0996b987d..6e98029f0 100644 --- a/vault/external_tests/raft/raft_autopilot_test.go +++ b/vault/external_tests/raft/raft_autopilot_test.go @@ -16,6 +16,7 @@ import ( "github.com/hashicorp/vault/helper/testhelpers" "github.com/hashicorp/vault/helper/testhelpers/teststorage" "github.com/hashicorp/vault/physical/raft" + "github.com/hashicorp/vault/sdk/version" "github.com/hashicorp/vault/vault" "github.com/kr/pretty" testingintf "github.com/mitchellh/go-testing-interface" @@ -412,3 +413,53 @@ func join(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster time.Sleep(1 * time.Second) cluster.UnsealCore(t, core) } + +// TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just +// because it hasn't been heard from in some time. +func TestRaft_VotersStayVoters(t *testing.T) { + cluster := raftCluster(t, &RaftClusterOpts{ + DisableFollowerJoins: true, + InmemCluster: true, + EnableAutopilot: true, + PhysicalFactoryConfig: map[string]interface{}{ + "performance_multiplier": "5", + }, + VersionMap: map[int]string{ + 0: version.Version, + 1: version.Version, + 2: version.Version, + }, + }) + defer cluster.Cleanup() + testhelpers.WaitForActiveNode(t, cluster) + + client := cluster.Cores[0].Client + + config, err := client.Sys().RaftAutopilotConfiguration() + require.NoError(t, err) + joinAndStabilizeAndPromote(t, cluster.Cores[1], client, cluster, config, "core-1", 2) + joinAndStabilizeAndPromote(t, cluster.Cores[2], client, cluster, config, "core-2", 3) + + errIfNonVotersExist := func() error { + t.Helper() + resp, err := client.Sys().RaftAutopilotState() + if err != nil { + t.Fatal(err) + } + for k, v := range resp.Servers { + if v.Status == "non-voter" { + return fmt.Errorf("node %q is a non-voter", k) + } + } + return nil + } + testhelpers.RetryUntil(t, 10*time.Second, errIfNonVotersExist) + + // Core0 is the leader, sealing it will both cause an election - and the + // new leader won't have seen any heartbeats initially - and create a "down" + // node that won't be sending heartbeats. + testhelpers.EnsureCoreSealed(t, cluster.Cores[0]) + time.Sleep(30 * time.Second) + client = cluster.Cores[1].Client + errIfNonVotersExist() +} diff --git a/vault/raft.go b/vault/raft.go index 15bc01312..3d6b9c4c7 100644 --- a/vault/raft.go +++ b/vault/raft.go @@ -24,6 +24,7 @@ import ( "github.com/hashicorp/vault/physical/raft" "github.com/hashicorp/vault/sdk/helper/jsonutil" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/sdk/version" "github.com/hashicorp/vault/vault/seal" "github.com/mitchellh/mapstructure" "golang.org/x/net/http2" @@ -353,6 +354,7 @@ func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raf AppliedIndex: 0, Term: 0, DesiredSuffrage: "voter", + SDKVersion: version.GetVersion().Version, }) } }