When renaming a node, ensure the name is not taken by another node.

Since DNS is case insensitive and DB as issues when similar names with different
cases are added, check for unicity based on case insensitivity.

Following another big incident we had in our cluster, we also validate
that adding/renaming a not does not conflicts with case insensitive
matches.

We had the following error once:

 - one node called: mymachine.MYDC.mydomain was shut off
 - another node (different ID) was added with name: mymachine.mydc.mydomain before
   72 hours

When restarting the consul server of domain, the consul server restarted failed
to start since it detected an issue in RAFT database because
mymachine.MYDC.mydomain and mymachine.mydc.mydomain had the same names.

Checking at registration time with case insensitivity should definitly fix
those issues and avoid Consul DB corruption.
This commit is contained in:
Pierre Souchay 2018-07-11 14:42:54 +02:00
parent 1b55e3559b
commit 3d0a960470
2 changed files with 123 additions and 0 deletions

View File

@ -340,6 +340,21 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error {
return nil return nil
} }
func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node) error {
// Retrieve all of the nodes
enodes, err := tx.Get("nodes", "id")
if err != nil {
return fmt.Errorf("Cannot lookup all nodes: %s", err)
}
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() {
enode := nodeIt.(*structs.Node)
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID {
return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node)
}
}
return nil
}
// ensureNodeTxn is the inner function called to actually create a node // ensureNodeTxn is the inner function called to actually create a node
// registration or modify an existing one in the state store. It allows // registration or modify an existing one in the state store. It allows
// passing in a memdb transaction so it may be part of a larger txn. // passing in a memdb transaction so it may be part of a larger txn.
@ -355,6 +370,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err
if existing != nil { if existing != nil {
n = existing.(*structs.Node) n = existing.(*structs.Node)
if n.Node != node.Node { if n.Node != node.Node {
// Lets first get all nodes and check whether name do match
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node)
if dupNameError != nil {
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError)
}
// We are actually renaming a node, remove its reference first // We are actually renaming a node, remove its reference first
err := s.deleteNodeTxn(tx, idx, n.Node) err := s.deleteNodeTxn(tx, idx, n.Node)
if err != nil { if err != nil {
@ -363,6 +383,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err
} }
} }
} else {
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node)
if dupNameError != nil {
return fmt.Errorf("Error while adding Node ID: %q: %s", node.ID, dupNameError)
}
} }
} }

View File

@ -494,6 +494,104 @@ func TestStateStore_EnsureNode(t *testing.T) {
if idx != 6 { if idx != 6 {
t.Fatalf("bad index: %d", idx) t.Fatalf("bad index: %d", idx)
} }
newNodeID := types.NodeID("d0347693-65cc-4d9f-a6e0-5025b2e6513f")
// Adding another node with same name should fail
in = &structs.Node{
Node: "node1-renamed",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(8, in); err == nil {
t.Fatalf("err: %s", err)
}
// Adding another node with same name but different case should fail
in = &structs.Node{
Node: "Node1-RENAMED",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(8, in); err == nil {
t.Fatalf("err: %s", err)
}
// Lets add another valid node now
in = &structs.Node{
Node: "Node1bis",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(9, in); err != nil {
t.Fatalf("err: %s", err)
}
// Retrieve the node
idx, out, err = s.GetNode("Node1bis")
if out == nil {
t.Fatalf("Node should exist, but was null")
}
// Renaming should fail
in = &structs.Node{
Node: "Node1bis",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(9, in); err != nil {
t.Fatalf("err: %s", err)
}
idx, out, err = s.GetNode("Node1bis")
if err != nil {
t.Fatalf("err: %s", err)
}
// Node and indexes were updated
if out.ID != newNodeID || out.CreateIndex != 9 || out.ModifyIndex != 9 || out.Address != "1.1.1.7" || out.Node != "Node1bis" {
t.Fatalf("bad: %#v", out)
}
if idx != 9 {
t.Fatalf("bad index: %d", idx)
}
// Renaming to same value as first node should fail as well
// Adding another node with same name but different case should fail
in = &structs.Node{
Node: "node1-renamed",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(10, in); err == nil {
t.Fatalf("err: %s", err)
}
// It should fail also with different case
in = &structs.Node{
Node: "Node1-Renamed",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(10, in); err == nil {
t.Fatalf("err: %s", err)
}
// But should work if names are different
in = &structs.Node{
Node: "Node1-Renamed2",
ID: newNodeID,
Address: "1.1.1.7",
}
if err := s.EnsureNode(11, in); err != nil {
t.Fatalf("err: %s", err)
}
// Node renamed should be Ok
idx, out, err = s.GetNode("Node1-Renamed2")
if err != nil {
t.Fatalf("err: %s", err)
}
} }
func TestStateStore_GetNodes(t *testing.T) { func TestStateStore_GetNodes(t *testing.T) {