From 821a91ca31b3071d23b46ca733478b3f30e8443d Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 10 Aug 2018 17:30:45 +0200 Subject: [PATCH] Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415) * Allow to rename nodes with IDs, will fix #3974 and #4413 This change allow to rename any well behaving recent agent with an ID to be renamed safely, ie: without taking the name of another one with case insensitive comparison. Deprecated behaviour warning ---------------------------- Due to asceding compatibility, it is still possible however to "take" the name of another name by not providing any ID. Note that when not providing any ID, it is possible to have 2 nodes having similar names with case differences, ie: myNode and mynode which might lead to DB corruption on Consul server side and lead to server not properly restarting. See #3983 and #4399 for Context about this change. Disabling registration of nodes without IDs as specified in #4414 should probably be the way to go eventually. * Removed the case-insensitive search when adding a node within the else block since it breaks the test TestAgentAntiEntropy_Services While the else case is probably legit, it will be fixed with #4414 in a later release. * Added again the test in the else to avoid duplicated names, but enforce this test only for nodes having IDs. Thus most tests without any ID will work, and allows us fixing * Added more tests regarding request with/without IDs. `TestStateStore_EnsureNode` now test registration and renaming with IDs `TestStateStore_EnsureNodeDeprecated` tests registration without IDs and tests removing an ID from a node as well as updated a node without its ID (deprecated behaviour kept for backwards compatibility) * Do not allow renaming in case of conflict, including when other node has no ID * Fixed function GetNodeID that was not working due to wrong type when searching node from its ID Thus, all tests about renaming were not working properly. Added the full test cas that allowed me to detect it. * Better error messages, more tests when nodeID is not a valid UUID in GetNodeID() * Added separate TestStateStore_GetNodeID to test GetNodeID. More complete test coverage for GetNodeID * Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn` Also fixed comments to be clearer after remarks from @banks * Fixed error message in unit test to match test case * Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler --- agent/consul/state/catalog.go | 77 ++++- agent/consul/state/catalog_test.go | 450 ++++++++++++++++++++++++++++- 2 files changed, 508 insertions(+), 19 deletions(-) diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index e7d5ba3c0..18175d22a 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/types" "github.com/hashicorp/go-memdb" + uuid "github.com/hashicorp/go-uuid" ) const ( @@ -350,6 +351,25 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error { return nil } +// ensureNoNodeWithSimilarNameTxn checks that no other node has conflict in its name +// If allowClashWithoutID then, getting a conflict on another node without ID will be allowed +func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node, allowClashWithoutID bool) error { + // Retrieve all of the nodes + enodes, err := tx.Get("nodes", "id") + if err != nil { + return fmt.Errorf("Cannot lookup all nodes: %s", err) + } + for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() { + enode := nodeIt.(*structs.Node) + if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID { + if !(enode.ID == "" && allowClashWithoutID) { + return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node) + } + } + } + return nil +} + // ensureNodeTxn is the inner function called to actually create a node // registration or modify an existing one in the state store. It allows // passing in a memdb transaction so it may be part of a larger txn. @@ -358,18 +378,36 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err // name is the same. var n *structs.Node if node.ID != "" { - existing, err := tx.First("nodes", "uuid", string(node.ID)) + existing, err := getNodeIDTxn(tx, node.ID) if err != nil { return fmt.Errorf("node lookup failed: %s", err) } if existing != nil { - n = existing.(*structs.Node) + n = existing if n.Node != node.Node { - return fmt.Errorf("node ID %q for node %q aliases existing node %q", - node.ID, node.Node, n.Node) + // Lets first get all nodes and check whether name do match, we do not allow clash on nodes without ID + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, false) + if dupNameError != nil { + return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) + } + // We are actually renaming a node, remove its reference first + err := s.deleteNodeTxn(tx, idx, n.Node) + if err != nil { + return fmt.Errorf("Error while renaming Node ID: %q from %s to %s", + node.ID, n.Node, node.Node) + } + } + } else { + // We allow to "steal" another node name that would have no ID + // It basically means that we allow upgrading a node without ID and add the ID + dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, true) + if dupNameError != nil { + return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) } } } + // TODO: else Node.ID == "" should be forbidden in future Consul releases + // See https://github.com/hashicorp/consul/pull/3983 for context // Check for an existing node by name to support nodes with no IDs. if n == nil { @@ -377,9 +415,13 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err if err != nil { return fmt.Errorf("node name lookup failed: %s", err) } + if existing != nil { n = existing.(*structs.Node) } + // WARNING, for compatibility reasons with tests, we do not check + // for case insensitive matches, which may lead to DB corruption + // See https://github.com/hashicorp/consul/pull/3983 for context } // Get the indexes. @@ -421,6 +463,23 @@ func (s *Store) GetNode(id string) (uint64, *structs.Node, error) { return idx, nil, nil } +func getNodeIDTxn(tx *memdb.Txn, id types.NodeID) (*structs.Node, error) { + strnode := string(id) + uuidValue, err := uuid.ParseUUID(strnode) + if err != nil { + return nil, fmt.Errorf("node lookup by ID failed, wrong UUID: %v for '%s'", err, strnode) + } + + node, err := tx.First("nodes", "uuid", uuidValue) + if err != nil { + return nil, fmt.Errorf("node lookup by ID failed: %s", err) + } + if node != nil { + return node.(*structs.Node), nil + } + return nil, nil +} + // GetNodeID is used to retrieve a node registration by node ID. func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) { tx := s.db.Txn(false) @@ -430,14 +489,8 @@ func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) { idx := maxIndexTxn(tx, "nodes") // Retrieve the node from the state store - node, err := tx.First("nodes", "uuid", string(id)) - if err != nil { - return 0, nil, fmt.Errorf("node lookup failed: %s", err) - } - if node != nil { - return idx, node.(*structs.Node), nil - } - return idx, nil, nil + node, err := getNodeIDTxn(tx, id) + return idx, node, err } // Nodes is used to return all of the known nodes. diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index 17a411aab..bc2f79fdc 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -25,7 +25,103 @@ func makeRandomNodeID(t *testing.T) types.NodeID { return types.NodeID(id) } +func TestStateStore_GetNodeID(t *testing.T) { + s := testStateStore(t) + _, out, err := s.GetNodeID(types.NodeID("wrongId")) + if err == nil || out != nil || !strings.Contains(err.Error(), "node lookup by ID failed, wrong UUID") { + t.Fatalf("want an error, nil value, err:=%q ; out:=%q", err.Error(), out) + } + _, out, err = s.GetNodeID(types.NodeID("0123456789abcdefghijklmnopqrstuvwxyz")) + if err == nil || out != nil || !strings.Contains(err.Error(), "node lookup by ID failed, wrong UUID") { + t.Fatalf("want an error, nil value, err:=%q ; out:=%q", err, out) + } + + _, out, err = s.GetNodeID(types.NodeID("00a916bc-a357-4a19-b886-59419fcee50Z")) + if err == nil || out != nil || !strings.Contains(err.Error(), "node lookup by ID failed, wrong UUID") { + t.Fatalf("want an error, nil value, err:=%q ; out:=%q", err, out) + } + + _, out, err = s.GetNodeID(types.NodeID("00a916bc-a357-4a19-b886-59419fcee506")) + if err != nil || out != nil { + t.Fatalf("do not want any error nor returned value, err:=%q ; out:=%q", err, out) + } + + nodeID := types.NodeID("00a916bc-a357-4a19-b886-59419fceeaaa") + req := &structs.RegisterRequest{ + ID: nodeID, + Node: "node1", + Address: "1.2.3.4", + } + if err := s.EnsureRegistration(1, req); err != nil { + t.Fatalf("err: %s", err) + } + + _, out, err = s.GetNodeID(nodeID) + if err != nil { + t.Fatalf("got err %s want nil", err) + } + if out == nil || out.ID != nodeID { + t.Fatalf("out should not be nil and contain nodeId, but was:=%#q", out) + } + // Case insensitive lookup should work as well + _, out, err = s.GetNodeID(types.NodeID("00a916bC-a357-4a19-b886-59419fceeAAA")) + if err != nil { + t.Fatalf("got err %s want nil", err) + } + if out == nil || out.ID != nodeID { + t.Fatalf("out should not be nil and contain nodeId, but was:=%#q", out) + } +} + +func TestStateStore_ensureNoNodeWithSimilarNameTxn(t *testing.T) { + t.Parallel() + s := testStateStore(t) + nodeID := makeRandomNodeID(t) + req := &structs.RegisterRequest{ + ID: nodeID, + Node: "node1", + Address: "1.2.3.4", + TaggedAddresses: map[string]string{"hello": "world"}, + NodeMeta: map[string]string{"somekey": "somevalue"}, + } + if err := s.EnsureRegistration(1, req); err != nil { + t.Fatalf("err: %s", err) + } + req = &structs.RegisterRequest{ + ID: types.NodeID(""), + Node: "node2", + Address: "10.0.0.1", + } + if err := s.EnsureRegistration(2, req); err != nil { + t.Fatalf("err: %s", err) + } + tx := s.db.Txn(true) + defer tx.Abort() + node := &structs.Node{ + ID: makeRandomNodeID(t), + Node: "NOdE1", // Name is similar but case is different + Address: "2.3.4.5", + } + // Lets conflict with node1 (has an ID) + if err := s.ensureNoNodeWithSimilarNameTxn(tx, node, false); err == nil { + t.Fatalf("Should return an error since another name with similar name exists") + } + if err := s.ensureNoNodeWithSimilarNameTxn(tx, node, true); err == nil { + t.Fatalf("Should return an error since another name with similar name exists") + } + // Lets conflict with node without ID + node.Node = "NoDe2" + if err := s.ensureNoNodeWithSimilarNameTxn(tx, node, false); err == nil { + t.Fatalf("Should return an error since another name with similar name exists") + } + if err := s.ensureNoNodeWithSimilarNameTxn(tx, node, true); err != nil { + t.Fatalf("Should not clash with another similar node name without ID, err:=%q", err) + } + +} + func TestStateStore_EnsureRegistration(t *testing.T) { + t.Parallel() s := testStateStore(t) // Start with just a node. @@ -64,6 +160,9 @@ func TestStateStore_EnsureRegistration(t *testing.T) { if err != nil { t.Fatalf("got err %s want nil", err) } + if out2 == nil { + t.Fatalf("out2 should not be nil") + } if got, want := out, out2; !verify.Values(t, "GetNodeID", got, want) { t.FailNow() } @@ -401,6 +500,189 @@ func TestStateStore_EnsureRegistration_Restore(t *testing.T) { }() } +func deprecatedEnsureNodeWithoutIDCanRegister(t *testing.T, s *Store, nodeName string, txIdx uint64) { + // All the following is deprecated, and should be removed in future Consul versions + in := &structs.Node{ + Node: nodeName, + Address: "1.1.1.9", + } + if err := s.EnsureNode(txIdx, in); err != nil { + t.Fatalf("err: %s", err) + } + idx, out, err := s.GetNode(nodeName) + if err != nil { + t.Fatalf("err: %s", err) + } + if idx != txIdx { + t.Fatalf("index should be %q, was: %q", txIdx, idx) + } + if out.Node != nodeName { + t.Fatalf("unexpected result out = %q, nodeName supposed to be %s", out, nodeName) + } +} + +func TestStateStore_EnsureNodeDeprecated(t *testing.T) { + s := testStateStore(t) + + firstNodeName := "node-without-id" + deprecatedEnsureNodeWithoutIDCanRegister(t, s, firstNodeName, 1) + + newNodeID := types.NodeID("00a916bc-a357-4a19-b886-59419fcee50c") + // With this request, we basically add a node ID to existing node + // and change its address + in := &structs.Node{ + ID: newNodeID, + Node: firstNodeName, + Address: "1.1.7.8", + } + if err := s.EnsureNode(4, in); err != nil { + t.Fatalf("err: %v", err) + } + // Retrieve the node again + idx, out, err := s.GetNode(firstNodeName) + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node has updated information + if idx != 4 || out.Node != firstNodeName || out.ID != newNodeID || out.Address != "1.1.7.8" { + t.Fatalf("[DEPRECATED] bad node returned: %#v", out) + } + if out.CreateIndex != 1 || out.ModifyIndex != 4 { + t.Fatalf("[DEPRECATED] bad CreateIndex/ModifyIndex returned: %#v", out) + } + + // Now, lets update IP Address without providing any ID + // Only name of node will be used to match + in = &structs.Node{ + Node: firstNodeName, + Address: "1.1.7.10", + } + if err := s.EnsureNode(7, in); err != nil { + t.Fatalf("err: %v", err) + } + // Retrieve the node again + idx, out, err = s.GetNode(firstNodeName) + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node has updated information, its ID has been removed (deprecated, but working) + if idx != 7 || out.Node != firstNodeName || out.ID != "" || out.Address != "1.1.7.10" { + t.Fatalf("[DEPRECATED] bad node returned: %#v", out) + } + if out.CreateIndex != 1 || out.ModifyIndex != 7 { + t.Fatalf("[DEPRECATED] bad CreateIndex/ModifyIndex returned: %#v", out) + } +} + +func TestNodeRenamingNodes(t *testing.T) { + s := testStateStore(t) + + nodeID1 := types.NodeID("b789bf0a-d96b-4f70-a4a6-ac5dfaece53d") + nodeID2 := types.NodeID("27bee224-a4d7-45d0-9b8e-65b3c94a61ba") + + // Node1 with ID + in1 := &structs.Node{ + ID: nodeID1, + Node: "node1", + Address: "1.1.1.1", + } + + if err := s.EnsureNode(1, in1); err != nil { + t.Fatalf("err: %s", err) + } + + // Node2 with ID + in2 := &structs.Node{ + ID: nodeID2, + Node: "node2", + Address: "1.1.1.2", + } + + if err := s.EnsureNode(2, in2); err != nil { + t.Fatalf("err: %s", err) + } + + // Node3 without ID + in3 := &structs.Node{ + Node: "node3", + Address: "1.1.1.3", + } + + if err := s.EnsureNode(3, in3); err != nil { + t.Fatalf("err: %s", err) + } + + if _, node, err := s.GetNodeID(nodeID1); err != nil || node == nil || node.ID != nodeID1 { + t.Fatalf("err: %s, node:= %q", err, node) + } + + if _, node, err := s.GetNodeID(nodeID2); err != nil && node == nil || node.ID != nodeID2 { + t.Fatalf("err: %s", err) + } + + // Renaming node2 into node1 should fail + in2Modify := &structs.Node{ + ID: nodeID2, + Node: "node1", + Address: "1.1.1.2", + } + if err := s.EnsureNode(4, in2Modify); err == nil { + t.Fatalf("Renaming node2 into node1 should fail") + } + + // Conflict with case insensitive matching as well + in2Modify = &structs.Node{ + ID: nodeID2, + Node: "NoDe1", + Address: "1.1.1.2", + } + if err := s.EnsureNode(5, in2Modify); err == nil { + t.Fatalf("Renaming node2 into node1 should fail") + } + + // Conflict with case insensitive on node without ID + in2Modify = &structs.Node{ + ID: nodeID2, + Node: "NoDe3", + Address: "1.1.1.2", + } + if err := s.EnsureNode(6, in2Modify); err == nil { + t.Fatalf("Renaming node2 into node1 should fail") + } + + // No conflict, should work + in2Modify = &structs.Node{ + ID: nodeID2, + Node: "node2bis", + Address: "1.1.1.2", + } + if err := s.EnsureNode(6, in2Modify); err != nil { + t.Fatalf("Renaming node2 into node1 should fail") + } + + // Retrieve the node again + idx, out, err := s.GetNode("node2bis") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node again + idx2, out2, err := s.GetNodeID(nodeID2) + if err != nil { + t.Fatalf("err: %s", err) + } + + if idx != idx2 { + t.Fatalf("node should be the same") + } + + if out.ID != out2.ID || out.Node != out2.Node { + t.Fatalf("all should match") + } +} + func TestStateStore_EnsureNode(t *testing.T) { s := testStateStore(t) @@ -411,6 +693,7 @@ func TestStateStore_EnsureNode(t *testing.T) { // Create a node registration request in := &structs.Node{ + ID: types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c"), Node: "node1", Address: "1.1.1.1", } @@ -474,21 +757,174 @@ func TestStateStore_EnsureNode(t *testing.T) { t.Fatalf("bad index: %d", idx) } - // Add an ID to the node - in.ID = types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c") + // Update index to 4, no change if err := s.EnsureNode(4, in); err != nil { t.Fatalf("err: %v", err) } // Now try to add another node with the same ID in = &structs.Node{ - Node: "nope", + Node: "node1-renamed", ID: types.NodeID("cda916bc-a357-4a19-b886-59419fcee50c"), - Address: "1.2.3.4", + Address: "1.1.1.2", } - err = s.EnsureNode(5, in) - if err == nil || !strings.Contains(err.Error(), "aliases existing node") { - t.Fatalf("err: %v", err) + if err := s.EnsureNode(6, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node + idx, out, err = s.GetNode("node1") + if out != nil { + t.Fatalf("Node should not exist anymore: %q", out) + } + + idx, out, err = s.GetNode("node1-renamed") + if err != nil { + t.Fatalf("err: %s", err) + } + + if out == nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.CreateIndex != 1 || out.ModifyIndex != 6 || out.Address != "1.1.1.2" || out.Node != "node1-renamed" { + t.Fatalf("bad: %#v", out) + } + if idx != 6 { + t.Fatalf("bad index: %d", idx) + } + + newNodeID := types.NodeID("d0347693-65cc-4d9f-a6e0-5025b2e6513f") + + // Adding another node with same name should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("There should be an error since node1-renamed already exists") + } + + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "Node1-RENAMED", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(8, in); err == nil { + t.Fatalf("err: %s", err) + } + + // Lets add another valid node now + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + // Retrieve the node + idx, out, err = s.GetNode("Node1bis") + if out == nil { + t.Fatalf("Node should exist, but was null") + } + + // Renaming should fail + in = &structs.Node{ + Node: "Node1bis", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(9, in); err != nil { + t.Fatalf("err: %s", err) + } + + idx, out, err = s.GetNode("Node1bis") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.ID != newNodeID || out.CreateIndex != 9 || out.ModifyIndex != 9 || out.Address != "1.1.1.7" || out.Node != "Node1bis" { + t.Fatalf("bad: %#v", out) + } + if idx != 9 { + t.Fatalf("bad index: %d", idx) + } + + // Renaming to same value as first node should fail as well + // Adding another node with same name but different case should fail + in = &structs.Node{ + Node: "node1-renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // It should fail also with different case + in = &structs.Node{ + Node: "Node1-Renamed", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(10, in); err == nil { + t.Fatalf("err: %s", err) + } + + // But should work if names are different + in = &structs.Node{ + Node: "Node1-Renamed2", + ID: newNodeID, + Address: "1.1.1.7", + } + if err := s.EnsureNode(11, in); err != nil { + t.Fatalf("err: %s", err) + } + idx, out, err = s.GetNode("Node1-Renamed2") + if err != nil { + t.Fatalf("err: %s", err) + } + + // Node and indexes were updated + if out.ID != newNodeID || out.CreateIndex != 9 || out.ModifyIndex != 11 || out.Address != "1.1.1.7" || out.Node != "Node1-Renamed2" { + t.Fatalf("bad: %#v", out) + } + if idx != 11 { + t.Fatalf("bad index: %d", idx) + } + + // All the remaining tests are deprecated, please remove them on next Consul major release + // See https://github.com/hashicorp/consul/pull/3983 for context + + // Deprecated behavior is following + deprecatedEnsureNodeWithoutIDCanRegister(t, s, "new-node-without-id", 12) + + // Deprecated, but should work as well + deprecatedEnsureNodeWithoutIDCanRegister(t, s, "new-node-without-id", 13) + + // All of this is deprecated as well, should be removed + in = &structs.Node{ + Node: "Node1-Renamed2", + Address: "1.1.1.66", + } + if err := s.EnsureNode(14, in); err != nil { + t.Fatalf("[DEPRECATED] it should work, err:= %q", err) + } + idx, out, err = s.GetNode("Node1-Renamed2") + if err != nil { + t.Fatalf("[DEPRECATED] err: %s", err) + } + if out.CreateIndex != 9 { + t.Fatalf("[DEPRECATED] We expected to modify node previously added, but add index = %d for node %q", out.CreateIndex, out) + } + if out.Address != "1.1.1.66" || out.ModifyIndex != 14 { + t.Fatalf("[DEPRECATED] Node with newNodeID should have been updated, but was: %d with content := %q", out.CreateIndex, out) } }