tests: deflake TestMonitor_Monitor_RemoteServer and cross-region tests

Ensure that all servers are joined to each other before test proceed,
instead of just joining them to the first server and relying on
background serf propagation.

Relying on backgorund serf propagation is a cause of flakiness,
specially for tests with multiple regions. The server receiving the RPC
may not be aware of the region and fail to forward RPC accordingly.

For example, consider `TestMonitor_Monitor_RemoteServer` failure in https://app.circleci.com/pipelines/github/hashicorp/nomad/16402/workflows/7f327235-7d0c-40ba-9757-600522afca51/jobs/158045  you can observe:
* `nomad-117` is joined to `nomad-118` and `nomad-119`
* `nomad-119` is the foreign region
* `nomad-117` gains leadership in the default region, `nomad-118` is the non-leader
*  search logs for `nomad: adding server` and notice that `nomad-118`
   only added `nomad-118` and `nomad-118`, but not `nomad-119`!
* so the query to the non-leader in the test fails to be forwarded to
  the appopriate region.
This commit is contained in:
Mahmood Ali 2021-06-08 13:47:04 -04:00
parent 071c556b3d
commit 8009d9837c
2 changed files with 14 additions and 9 deletions

View File

@ -151,7 +151,7 @@ func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) {
// Join the servers to s1, and wait until they are all promoted to
// voters.
TestJoin(t, s1, servers[1:]...)
TestJoin(t, servers...)
retry.Run(t, func(r *retry.R) {
r.Check(wantRaft(servers))
for _, s := range servers {

View File

@ -168,14 +168,19 @@ func TestServer(t testing.T, cb func(*Config)) (*Server, func()) {
return nil, nil
}
func TestJoin(t testing.T, s1 *Server, other ...*Server) {
func TestJoin(t testing.T, servers ...*Server) {
for i := 0; i < len(servers)-1; i++ {
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfConfig.MemberlistConfig.BindPort)
for _, s2 := range other {
if num, err := s2.Join([]string{addr}); err != nil {
servers[i].config.SerfConfig.MemberlistConfig.BindPort)
for j := i + 1; j < len(servers); j++ {
num, err := servers[j].Join([]string{addr})
if err != nil {
t.Fatalf("err: %v", err)
} else if num != 1 {
}
if num != 1 {
t.Fatalf("bad: %d", num)
}
}
}
}