autopilot: assume nodes we haven't received heartbeats from are running the same version as we are (#17019)
OSS parts of ent PR #3172: assume nodes we haven't received heartbeats from are running the same version as we are. Failing to provide a version/upgrade_version will result in Autopilot (on ent) demoting those unversioned nodes to non-voters until we receive a heartbeat from them.
This commit is contained in:
parent
a034ebfd27
commit
5db952eada
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
storage/raft: Nodes no longer get demoted to nonvoter if we don't know their version due to missing heartbeats.
|
||||
```
|
|
@ -126,7 +126,7 @@ func (l *InmemLayer) Dial(addr string, timeout time.Duration, tlsConfig *tls.Con
|
|||
}
|
||||
|
||||
if l.logger.IsDebug() {
|
||||
l.logger.Debug("dailing connection", "node", l.addr, "remote", addr, "alpn", alpn)
|
||||
l.logger.Debug("dialing connection", "node", l.addr, "remote", addr, "alpn", alpn)
|
||||
}
|
||||
|
||||
if connectionCh != nil {
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/hashicorp/vault/helper/testhelpers"
|
||||
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
|
||||
"github.com/hashicorp/vault/physical/raft"
|
||||
"github.com/hashicorp/vault/sdk/version"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
"github.com/kr/pretty"
|
||||
testingintf "github.com/mitchellh/go-testing-interface"
|
||||
|
@ -412,3 +413,53 @@ func join(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster
|
|||
time.Sleep(1 * time.Second)
|
||||
cluster.UnsealCore(t, core)
|
||||
}
|
||||
|
||||
// TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just
|
||||
// because it hasn't been heard from in some time.
|
||||
func TestRaft_VotersStayVoters(t *testing.T) {
|
||||
cluster := raftCluster(t, &RaftClusterOpts{
|
||||
DisableFollowerJoins: true,
|
||||
InmemCluster: true,
|
||||
EnableAutopilot: true,
|
||||
PhysicalFactoryConfig: map[string]interface{}{
|
||||
"performance_multiplier": "5",
|
||||
},
|
||||
VersionMap: map[int]string{
|
||||
0: version.Version,
|
||||
1: version.Version,
|
||||
2: version.Version,
|
||||
},
|
||||
})
|
||||
defer cluster.Cleanup()
|
||||
testhelpers.WaitForActiveNode(t, cluster)
|
||||
|
||||
client := cluster.Cores[0].Client
|
||||
|
||||
config, err := client.Sys().RaftAutopilotConfiguration()
|
||||
require.NoError(t, err)
|
||||
joinAndStabilizeAndPromote(t, cluster.Cores[1], client, cluster, config, "core-1", 2)
|
||||
joinAndStabilizeAndPromote(t, cluster.Cores[2], client, cluster, config, "core-2", 3)
|
||||
|
||||
errIfNonVotersExist := func() error {
|
||||
t.Helper()
|
||||
resp, err := client.Sys().RaftAutopilotState()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for k, v := range resp.Servers {
|
||||
if v.Status == "non-voter" {
|
||||
return fmt.Errorf("node %q is a non-voter", k)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
testhelpers.RetryUntil(t, 10*time.Second, errIfNonVotersExist)
|
||||
|
||||
// Core0 is the leader, sealing it will both cause an election - and the
|
||||
// new leader won't have seen any heartbeats initially - and create a "down"
|
||||
// node that won't be sending heartbeats.
|
||||
testhelpers.EnsureCoreSealed(t, cluster.Cores[0])
|
||||
time.Sleep(30 * time.Second)
|
||||
client = cluster.Cores[1].Client
|
||||
errIfNonVotersExist()
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"github.com/hashicorp/vault/physical/raft"
|
||||
"github.com/hashicorp/vault/sdk/helper/jsonutil"
|
||||
"github.com/hashicorp/vault/sdk/logical"
|
||||
"github.com/hashicorp/vault/sdk/version"
|
||||
"github.com/hashicorp/vault/vault/seal"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"golang.org/x/net/http2"
|
||||
|
@ -353,6 +354,7 @@ func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raf
|
|||
AppliedIndex: 0,
|
||||
Term: 0,
|
||||
DesiredSuffrage: "voter",
|
||||
SDKVersion: version.GetVersion().Version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue