Add logic to allow changing a failed node's ID
This commit is contained in:
parent
5ab580990b
commit
bf09061e86
|
@ -1335,8 +1335,13 @@ AFTER_CHECK:
|
||||||
|
|
||||||
// If there's existing information about the node, do not
|
// If there's existing information about the node, do not
|
||||||
// clobber it.
|
// clobber it.
|
||||||
SkipNodeUpdate: true,
|
//SkipNodeUpdate: true,
|
||||||
}
|
}
|
||||||
|
if node != nil {
|
||||||
|
req.TaggedAddresses = node.TaggedAddresses
|
||||||
|
req.NodeMeta = node.Meta
|
||||||
|
}
|
||||||
|
|
||||||
_, err = s.raftApply(structs.RegisterRequestType, &req)
|
_, err = s.raftApply(structs.RegisterRequestType, &req)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -953,6 +953,78 @@ func TestLeader_ChangeServerID(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLeader_ChangeNodeID(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
conf := func(c *Config) {
|
||||||
|
c.Bootstrap = false
|
||||||
|
c.BootstrapExpect = 3
|
||||||
|
c.Datacenter = "dc1"
|
||||||
|
}
|
||||||
|
dir1, s1 := testServerWithConfig(t, conf)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
|
||||||
|
dir2, s2 := testServerWithConfig(t, conf)
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
|
||||||
|
dir3, s3 := testServerWithConfig(t, conf)
|
||||||
|
defer os.RemoveAll(dir3)
|
||||||
|
defer s3.Shutdown()
|
||||||
|
|
||||||
|
servers := []*Server{s1, s2, s3}
|
||||||
|
|
||||||
|
// Try to join and wait for all servers to get promoted
|
||||||
|
joinLAN(t, s2, s1)
|
||||||
|
joinLAN(t, s3, s1)
|
||||||
|
for _, s := range servers {
|
||||||
|
testrpc.WaitForTestAgent(t, s.RPC, "dc1")
|
||||||
|
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shut down a server, freeing up its address/port
|
||||||
|
s3.Shutdown()
|
||||||
|
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
alive := 0
|
||||||
|
for _, m := range s1.LANMembers() {
|
||||||
|
if m.Status == serf.StatusAlive {
|
||||||
|
alive++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got, want := alive, 2; got != want {
|
||||||
|
r.Fatalf("got %d alive members want %d", got, want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Bring up a new server with s3's address that will get a different ID
|
||||||
|
dir4, s4 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.Bootstrap = false
|
||||||
|
c.Datacenter = "dc1"
|
||||||
|
c.NodeName = s3.config.NodeName
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir4)
|
||||||
|
defer s4.Shutdown()
|
||||||
|
joinLAN(t, s4, s1)
|
||||||
|
servers[2] = s4
|
||||||
|
|
||||||
|
// Make sure the dead server is removed and we're back to 3 total peers
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
r.Check(wantRaft(servers))
|
||||||
|
for _, s := range servers {
|
||||||
|
r.Check(wantPeers(s, 3))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
for _, m := range s1.LANMembers() {
|
||||||
|
if m.Status != serf.StatusAlive {
|
||||||
|
r.Fatalf("bad status: %v", m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestLeader_ACL_Initialization(t *testing.T) {
|
func TestLeader_ACL_Initialization(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|
|
@ -369,7 +369,22 @@ func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node
|
||||||
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() {
|
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() {
|
||||||
enode := nodeIt.(*structs.Node)
|
enode := nodeIt.(*structs.Node)
|
||||||
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID {
|
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID {
|
||||||
if !(enode.ID == "" && allowClashWithoutID) {
|
// Look up the existing node's Serf health check to see if it's failed.
|
||||||
|
// If it is, the node can be renamed.
|
||||||
|
enodeCheck, err := tx.First("checks", "id", enode.Node, string(structs.SerfCheckID))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Cannot get status of node %s: %s", enode.Node, err)
|
||||||
|
}
|
||||||
|
if enodeCheck == nil {
|
||||||
|
return fmt.Errorf("Cannot rename node %s: Serf health check not found for existing node", enode.Node)
|
||||||
|
}
|
||||||
|
|
||||||
|
enodeSerfCheck, ok := enodeCheck.(*structs.HealthCheck)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("Existing node %q's Serf health check has type %T", enode.Node, enodeSerfCheck)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !((enode.ID == "" || enodeSerfCheck.Status == api.HealthCritical) && allowClashWithoutID) {
|
||||||
return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node)
|
return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue