From 3d0a96047063d8d4c66c9f12e0ec22fa99a6fc47 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 11 Jul 2018 14:42:54 +0200 Subject: [PATCH] When renaming a node, ensure the name is not taken by another node. Since DNS is case insensitive and DB as issues when similar names with different cases are added, check for unicity based on case insensitivity. Following another big incident we had in our cluster, we also validate that adding/renaming a not does not conflicts with case insensitive matches. We had the following error once: - one node called: mymachine.MYDC.mydomain was shut off - another node (different ID) was added with name: mymachine.mydc.mydomain before 72 hours When restarting the consul server of domain, the consul server restarted failed to start since it detected an issue in RAFT database because mymachine.MYDC.mydomain and mymachine.mydc.mydomain had the same names. Checking at registration time with case insensitivity should definitly fix those issues and avoid Consul DB corruption. --- agent/consul/state/catalog.go | 25 ++++++++ agent/consul/state/catalog_test.go | 98 ++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index f01148adb..21179accd 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -340,6 +340,21 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error { return nil } +func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node) error { + // Retrieve all of the nodes + enodes, err := tx.Get("nodes", "id") + if err != nil { + return fmt.Errorf("Cannot lookup all nodes: %s", err) + } + for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() { + enode := nodeIt.(*structs.Node) + if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID { + return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node) + } + } + return nil +} + // ensureNodeTxn is the inner function called to actually create a node // registration or modify an existing one in the state store. It allows // passing in a memdb transaction so it may be part of a larger txn. @@ -355,6 +370,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err if existing != nil { n = existing.(*structs.Node) if n.Node != node.Node { + // Lets first get all nodes and check whether name do match + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node) + if dupNameError != nil { + return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) + } // We are actually renaming a node, remove its reference first err := s.deleteNodeTxn(tx, idx, n.Node) if err != nil { @@ -363,6 +383,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err } } + } else { + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node) + if dupNameError != nil { + return fmt.Errorf("Error while adding Node ID: %q: %s", node.ID, dupNameError) + } } } diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index 593fbcada..dc25167fb 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -494,6 +494,104 @@ func TestStateStore_EnsureNode(t *testing.T) { if idx != 6 { t.Fatalf("bad index: %d", idx) } + + newNodeID := types.NodeID("d0347693-65cc-4d9f-a6e0-5025b2e6513f") + + // Adding another node with same name should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("err: %s", err) + } + + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "Node1-RENAMED", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("err: %s", err) + } + + // Lets add another valid node now + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node + idx, out, err = s.GetNode("Node1bis") + if out == nil { + t.Fatalf("Node should exist, but was null") + } + + // Renaming should fail + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + idx, out, err = s.GetNode("Node1bis") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.ID != newNodeID || out.CreateIndex != 9 || out.ModifyIndex != 9 || out.Address != "1.1.1.7" || out.Node != "Node1bis" { + t.Fatalf("bad: %#v", out) + } + if idx != 9 { + t.Fatalf("bad index: %d", idx) + } + + // Renaming to same value as first node should fail as well + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // It should fail also with different case + in = &structs.Node{ + Node: "Node1-Renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // But should work if names are different + in = &structs.Node{ + Node: "Node1-Renamed2", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(11, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Node renamed should be Ok + idx, out, err = s.GetNode("Node1-Renamed2") + if err != nil { + t.Fatalf("err: %s", err) + } } func TestStateStore_GetNodes(t *testing.T) {