autopilot: assume nodes we haven't received heartbeats from are running the same version as we are (#17019)
OSS parts of ent PR #3172: assume nodes we haven't received heartbeats from are running the same version as we are. Failing to provide a version/upgrade_version will result in Autopilot (on ent) demoting those unversioned nodes to non-voters until we receive a heartbeat from them.
This commit is contained in:
parent
a034ebfd27
commit
5db952eada
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
storage/raft: Nodes no longer get demoted to nonvoter if we don't know their version due to missing heartbeats.
|
||||||
|
```
|
|
@ -126,7 +126,7 @@ func (l *InmemLayer) Dial(addr string, timeout time.Duration, tlsConfig *tls.Con
|
||||||
}
|
}
|
||||||
|
|
||||||
if l.logger.IsDebug() {
|
if l.logger.IsDebug() {
|
||||||
l.logger.Debug("dailing connection", "node", l.addr, "remote", addr, "alpn", alpn)
|
l.logger.Debug("dialing connection", "node", l.addr, "remote", addr, "alpn", alpn)
|
||||||
}
|
}
|
||||||
|
|
||||||
if connectionCh != nil {
|
if connectionCh != nil {
|
||||||
|
|
|
@ -16,6 +16,7 @@ import (
|
||||||
"github.com/hashicorp/vault/helper/testhelpers"
|
"github.com/hashicorp/vault/helper/testhelpers"
|
||||||
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
|
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
|
||||||
"github.com/hashicorp/vault/physical/raft"
|
"github.com/hashicorp/vault/physical/raft"
|
||||||
|
"github.com/hashicorp/vault/sdk/version"
|
||||||
"github.com/hashicorp/vault/vault"
|
"github.com/hashicorp/vault/vault"
|
||||||
"github.com/kr/pretty"
|
"github.com/kr/pretty"
|
||||||
testingintf "github.com/mitchellh/go-testing-interface"
|
testingintf "github.com/mitchellh/go-testing-interface"
|
||||||
|
@ -412,3 +413,53 @@ func join(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
cluster.UnsealCore(t, core)
|
cluster.UnsealCore(t, core)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just
|
||||||
|
// because it hasn't been heard from in some time.
|
||||||
|
func TestRaft_VotersStayVoters(t *testing.T) {
|
||||||
|
cluster := raftCluster(t, &RaftClusterOpts{
|
||||||
|
DisableFollowerJoins: true,
|
||||||
|
InmemCluster: true,
|
||||||
|
EnableAutopilot: true,
|
||||||
|
PhysicalFactoryConfig: map[string]interface{}{
|
||||||
|
"performance_multiplier": "5",
|
||||||
|
},
|
||||||
|
VersionMap: map[int]string{
|
||||||
|
0: version.Version,
|
||||||
|
1: version.Version,
|
||||||
|
2: version.Version,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
defer cluster.Cleanup()
|
||||||
|
testhelpers.WaitForActiveNode(t, cluster)
|
||||||
|
|
||||||
|
client := cluster.Cores[0].Client
|
||||||
|
|
||||||
|
config, err := client.Sys().RaftAutopilotConfiguration()
|
||||||
|
require.NoError(t, err)
|
||||||
|
joinAndStabilizeAndPromote(t, cluster.Cores[1], client, cluster, config, "core-1", 2)
|
||||||
|
joinAndStabilizeAndPromote(t, cluster.Cores[2], client, cluster, config, "core-2", 3)
|
||||||
|
|
||||||
|
errIfNonVotersExist := func() error {
|
||||||
|
t.Helper()
|
||||||
|
resp, err := client.Sys().RaftAutopilotState()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
for k, v := range resp.Servers {
|
||||||
|
if v.Status == "non-voter" {
|
||||||
|
return fmt.Errorf("node %q is a non-voter", k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
testhelpers.RetryUntil(t, 10*time.Second, errIfNonVotersExist)
|
||||||
|
|
||||||
|
// Core0 is the leader, sealing it will both cause an election - and the
|
||||||
|
// new leader won't have seen any heartbeats initially - and create a "down"
|
||||||
|
// node that won't be sending heartbeats.
|
||||||
|
testhelpers.EnsureCoreSealed(t, cluster.Cores[0])
|
||||||
|
time.Sleep(30 * time.Second)
|
||||||
|
client = cluster.Cores[1].Client
|
||||||
|
errIfNonVotersExist()
|
||||||
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import (
|
||||||
"github.com/hashicorp/vault/physical/raft"
|
"github.com/hashicorp/vault/physical/raft"
|
||||||
"github.com/hashicorp/vault/sdk/helper/jsonutil"
|
"github.com/hashicorp/vault/sdk/helper/jsonutil"
|
||||||
"github.com/hashicorp/vault/sdk/logical"
|
"github.com/hashicorp/vault/sdk/logical"
|
||||||
|
"github.com/hashicorp/vault/sdk/version"
|
||||||
"github.com/hashicorp/vault/vault/seal"
|
"github.com/hashicorp/vault/vault/seal"
|
||||||
"github.com/mitchellh/mapstructure"
|
"github.com/mitchellh/mapstructure"
|
||||||
"golang.org/x/net/http2"
|
"golang.org/x/net/http2"
|
||||||
|
@ -353,6 +354,7 @@ func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raf
|
||||||
AppliedIndex: 0,
|
AppliedIndex: 0,
|
||||||
Term: 0,
|
Term: 0,
|
||||||
DesiredSuffrage: "voter",
|
DesiredSuffrage: "voter",
|
||||||
|
SDKVersion: version.GetVersion().Version,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue