f8884d8b52
Fixes #16517 Given a 3 Server cluster with at least 1 Client connected to Follower 1: If a NodeMeta.{Apply,Read} for the Client request is received by Follower 1 with `AllowStale = false` the Follower will forward the request to the Leader. The Leader, not being connected to the target Client, will forward the RPC to Follower 1. Follower 1, seeing AllowStale=false, will forward the request to the Leader. The Leader, not being connected to... well hoppefully you get the picture: an infinite loop occurs.
130 lines
3.9 KiB
Go
130 lines
3.9 KiB
Go
package nomad
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/client"
|
|
"github.com/hashicorp/nomad/client/config"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
"github.com/shoenig/test/must"
|
|
)
|
|
|
|
// TestNodeMeta_Forward asserts that Client RPCs do not result in infinite
|
|
// loops. For example in a cluster with 1 Leader, 2 Followers, and a Node
|
|
// connected to Follower 1:
|
|
//
|
|
// If a NodeMeta.Apply RPC with AllowStale=false is received by Follower 1, it
|
|
// will honor AllowStale=false and forward the request to the Leader.
|
|
//
|
|
// The Leader will accept the RPC, notice that Follower 1 has a connection to
|
|
// the Node, and the Leader will send the request back to Follower 1.
|
|
//
|
|
// Follower 1, ever respectful of AllowStale=false, will forward it back to the
|
|
// Leader.
|
|
//
|
|
// The Leader, being unable to forward to the Node, will send it back to
|
|
// Follower 1.
|
|
//
|
|
// This argument will continue until one of the Servers runs out of memory or
|
|
// patience and stomps away in anger (crashes). Like any good argument the
|
|
// ending is never pretty as the Servers will suffer CPU starvation and
|
|
// potentially Raft flapping before anyone actually OOMs.
|
|
//
|
|
// See https://github.com/hashicorp/nomad/issues/16517 for details.
|
|
//
|
|
// If test fails it will do so spectacularly by consuming all available CPU and
|
|
// potentially all available memory. Running it in a VM or container is
|
|
// suggested.
|
|
func TestNodeMeta_Forward(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
servers := []*Server{}
|
|
for i := 0; i < 3; i++ {
|
|
s, cleanup := TestServer(t, func(c *Config) {
|
|
c.BootstrapExpect = 3
|
|
c.NumSchedulers = 0
|
|
})
|
|
t.Cleanup(cleanup)
|
|
servers = append(servers, s)
|
|
}
|
|
|
|
TestJoin(t, servers...)
|
|
leader := testutil.WaitForLeaders(t, servers[0].RPC, servers[1].RPC, servers[2].RPC)
|
|
|
|
followers := []string{}
|
|
for _, s := range servers {
|
|
if addr := s.config.RPCAddr.String(); addr != leader {
|
|
followers = append(followers, addr)
|
|
}
|
|
}
|
|
t.Logf("leader=%s followers=%q", leader, followers)
|
|
|
|
clients := []*client.Client{}
|
|
for i := 0; i < 4; i++ {
|
|
c, cleanup := client.TestClient(t, func(c *config.Config) {
|
|
// Clients will rebalance across all servers, but try to get them to use
|
|
// followers to ensure we don't hit the loop in #16517
|
|
c.Servers = followers
|
|
})
|
|
defer cleanup()
|
|
clients = append(clients, c)
|
|
}
|
|
for _, c := range clients {
|
|
testutil.WaitForClient(t, servers[0].RPC, c.NodeID(), c.Region())
|
|
}
|
|
|
|
agentRPCs := []func(string, any, any) error{}
|
|
nodeIDs := make([]string, 0, len(clients))
|
|
|
|
// Build list of agents and node IDs
|
|
for _, s := range servers {
|
|
agentRPCs = append(agentRPCs, s.RPC)
|
|
}
|
|
|
|
for _, c := range clients {
|
|
agentRPCs = append(agentRPCs, c.RPC)
|
|
nodeIDs = append(nodeIDs, c.NodeID())
|
|
}
|
|
|
|
region := clients[0].Region()
|
|
|
|
// Apply metadata to every client through every agent to ensure forwarding
|
|
// always works regardless of path taken.
|
|
for _, rpc := range agentRPCs {
|
|
for _, nodeID := range nodeIDs {
|
|
args := &structs.NodeMetaApplyRequest{
|
|
// Intentionally don't set QueryOptions.AllowStale to exercise #16517
|
|
QueryOptions: structs.QueryOptions{
|
|
Region: region,
|
|
},
|
|
NodeID: nodeID,
|
|
Meta: map[string]*string{"testing": pointer.Of("123")},
|
|
}
|
|
reply := &structs.NodeMetaResponse{}
|
|
must.NoError(t, rpc("NodeMeta.Apply", args, reply))
|
|
must.MapNotEmpty(t, reply.Meta)
|
|
}
|
|
}
|
|
|
|
for _, rpc := range agentRPCs {
|
|
for _, nodeID := range nodeIDs {
|
|
args := &structs.NodeSpecificRequest{
|
|
// Intentionally don't set QueryOptions.AllowStale to exercise #16517
|
|
QueryOptions: structs.QueryOptions{
|
|
Region: region,
|
|
},
|
|
NodeID: nodeID,
|
|
}
|
|
reply := &structs.NodeMetaResponse{}
|
|
must.NoError(t, rpc("NodeMeta.Read", args, reply))
|
|
must.MapNotEmpty(t, reply.Meta)
|
|
must.Eq(t, reply.Meta["testing"], "123")
|
|
must.MapNotEmpty(t, reply.Dynamic)
|
|
must.Eq(t, *reply.Dynamic["testing"], "123")
|
|
}
|
|
}
|
|
}
|