From eccb56ade0c856a7ce734ef8a38556aa3ac6db3a Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Mon, 26 Mar 2018 16:44:13 +0200 Subject: [PATCH 1/3] Added support for renaming nodes when their IP does not change --- agent/consul/state/catalog.go | 13 +++++++++++-- agent/consul/state/catalog_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index 38d59e810..819693484 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -355,9 +355,18 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err if existing != nil { n = existing.(*structs.Node) if n.Node != node.Node { - return fmt.Errorf("node ID %q for node %q aliases existing node %q", - node.ID, node.Node, n.Node) + if n.Address != node.Address { + return fmt.Errorf("node ID %q for node %q (%s) aliases existing node %q (%s)", + node.ID, node.Node, node.Address, n.Node, n.Address) + } + fmt.Printf("Node renaming: %s VS %s -- idx=%d\n", node.Node, n.Node, idx) + err := s.deleteNodeTxn(tx, idx, n.Node) + if err != nil { + return fmt.Errorf("Error while renaming Node ID: %q from %s to %s", + node.ID, n.Node, node.Node) + } } + } } diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index 39cd81d60..03e0f1f48 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -476,6 +476,35 @@ func TestStateStore_EnsureNode(t *testing.T) { if err == nil || !strings.Contains(err.Error(), "aliases existing node") { t.Fatalf("err: %v", err) } + + // Renaming a node should work as long as IP did not change + in = &structs.Node{ + Node: "node1-renamed", + ID: types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c"), + Address: "1.1.1.2", + } + if err := s.EnsureNode(6, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node + idx, out, err = s.GetNode("node1") + if out != nil { + t.Fatalf("Node should not exist anymore: %q", out) + } + + idx, out, err = s.GetNode("node1-renamed") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.CreateIndex != 1 || out.ModifyIndex != 6 || out.Address != "1.1.1.2" || out.Node != "node1-renamed" { + t.Fatalf("bad: %#v", out) + } + if idx != 6 { + t.Fatalf("bad index: %d", idx) + } } func TestStateStore_GetNodes(t *testing.T) { From 1b55e3559b21358e8247d5a422afaebee2ed30d2 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 18 Apr 2018 15:39:38 +0200 Subject: [PATCH 2/3] Allow renaming nodes when ID is unchanged --- agent/consul/state/catalog.go | 6 +----- agent/consul/state/catalog_test.go | 13 +------------ 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index 819693484..f01148adb 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -355,11 +355,7 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err if existing != nil { n = existing.(*structs.Node) if n.Node != node.Node { - if n.Address != node.Address { - return fmt.Errorf("node ID %q for node %q (%s) aliases existing node %q (%s)", - node.ID, node.Node, node.Address, n.Node, n.Address) - } - fmt.Printf("Node renaming: %s VS %s -- idx=%d\n", node.Node, n.Node, idx) + // We are actually renaming a node, remove its reference first err := s.deleteNodeTxn(tx, idx, n.Node) if err != nil { return fmt.Errorf("Error while renaming Node ID: %q from %s to %s", diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index 03e0f1f48..593fbcada 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -466,18 +466,7 @@ func TestStateStore_EnsureNode(t *testing.T) { t.Fatalf("err: %v", err) } - // Now try to add another node with the same ID - in = &structs.Node{ - Node: "nope", - ID: types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c"), - Address: "1.2.3.4", - } - err = s.EnsureNode(5, in) - if err == nil || !strings.Contains(err.Error(), "aliases existing node") { - t.Fatalf("err: %v", err) - } - - // Renaming a node should work as long as IP did not change + // Renaming a node should work if ID is the same in = &structs.Node{ Node: "node1-renamed", ID: types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c"), From 3d0a96047063d8d4c66c9f12e0ec22fa99a6fc47 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 11 Jul 2018 14:42:54 +0200 Subject: [PATCH 3/3] When renaming a node, ensure the name is not taken by another node. Since DNS is case insensitive and DB as issues when similar names with different cases are added, check for unicity based on case insensitivity. Following another big incident we had in our cluster, we also validate that adding/renaming a not does not conflicts with case insensitive matches. We had the following error once: - one node called: mymachine.MYDC.mydomain was shut off - another node (different ID) was added with name: mymachine.mydc.mydomain before 72 hours When restarting the consul server of domain, the consul server restarted failed to start since it detected an issue in RAFT database because mymachine.MYDC.mydomain and mymachine.mydc.mydomain had the same names. Checking at registration time with case insensitivity should definitly fix those issues and avoid Consul DB corruption. --- agent/consul/state/catalog.go | 25 ++++++++ agent/consul/state/catalog_test.go | 98 ++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index f01148adb..21179accd 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -340,6 +340,21 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error { return nil } +func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node) error { + // Retrieve all of the nodes + enodes, err := tx.Get("nodes", "id") + if err != nil { + return fmt.Errorf("Cannot lookup all nodes: %s", err) + } + for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() { + enode := nodeIt.(*structs.Node) + if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID { + return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node) + } + } + return nil +} + // ensureNodeTxn is the inner function called to actually create a node // registration or modify an existing one in the state store. It allows // passing in a memdb transaction so it may be part of a larger txn. @@ -355,6 +370,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err if existing != nil { n = existing.(*structs.Node) if n.Node != node.Node { + // Lets first get all nodes and check whether name do match + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node) + if dupNameError != nil { + return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) + } // We are actually renaming a node, remove its reference first err := s.deleteNodeTxn(tx, idx, n.Node) if err != nil { @@ -363,6 +383,11 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err } } + } else { + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node) + if dupNameError != nil { + return fmt.Errorf("Error while adding Node ID: %q: %s", node.ID, dupNameError) + } } } diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index 593fbcada..dc25167fb 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -494,6 +494,104 @@ func TestStateStore_EnsureNode(t *testing.T) { if idx != 6 { t.Fatalf("bad index: %d", idx) } + + newNodeID := types.NodeID("d0347693-65cc-4d9f-a6e0-5025b2e6513f") + + // Adding another node with same name should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("err: %s", err) + } + + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "Node1-RENAMED", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("err: %s", err) + } + + // Lets add another valid node now + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node + idx, out, err = s.GetNode("Node1bis") + if out == nil { + t.Fatalf("Node should exist, but was null") + } + + // Renaming should fail + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + idx, out, err = s.GetNode("Node1bis") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.ID != newNodeID || out.CreateIndex != 9 || out.ModifyIndex != 9 || out.Address != "1.1.1.7" || out.Node != "Node1bis" { + t.Fatalf("bad: %#v", out) + } + if idx != 9 { + t.Fatalf("bad index: %d", idx) + } + + // Renaming to same value as first node should fail as well + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // It should fail also with different case + in = &structs.Node{ + Node: "Node1-Renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // But should work if names are different + in = &structs.Node{ + Node: "Node1-Renamed2", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(11, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Node renamed should be Ok + idx, out, err = s.GetNode("Node1-Renamed2") + if err != nil { + t.Fatalf("err: %s", err) + } } func TestStateStore_GetNodes(t *testing.T) {