tests: deflake test that joins a server with non-voting servers to form qourum

This PR
 - upgrades the serf library
 - has the test start the join process using the un-joined server first
 - disables schedulers on the servers
 - uses the WaitForLeader and wantPeers helpers

Not sure which, if any of these actually improves the flakiness of this test.
This commit is contained in:
Seth Hoenig 2022-02-24 16:34:56 -06:00
parent 81521ca248
commit 1274aa690f
5 changed files with 43 additions and 69 deletions

3
.changelog/12130.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
deps: Update serf library to v0.9.7
```

6
go.mod
View File

@ -78,7 +78,7 @@ require (
github.com/hashicorp/nomad/api v0.0.0-20200529203653-c4416b26d3eb
github.com/hashicorp/raft v1.3.5
github.com/hashicorp/raft-boltdb/v2 v2.2.0
github.com/hashicorp/serf v0.9.5
github.com/hashicorp/serf v0.9.7
github.com/hashicorp/vault/api v1.0.5-0.20200805123347-1ef507638af6
github.com/hashicorp/vault/sdk v0.2.0
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d
@ -86,7 +86,7 @@ require (
github.com/kr/pretty v0.3.0
github.com/kr/text v0.2.0
github.com/mattn/go-colorable v0.1.9
github.com/miekg/dns v1.1.26
github.com/miekg/dns v1.1.41
github.com/mitchellh/cli v1.1.2
github.com/mitchellh/colorstring v0.0.0-20150917214807-8631ce90f286
github.com/mitchellh/copystructure v1.2.0
@ -205,7 +205,7 @@ require (
github.com/hashicorp/go-secure-stdlib/reloadutil v0.1.1 // indirect
github.com/hashicorp/go-secure-stdlib/strutil v0.1.1 // indirect
github.com/hashicorp/go-secure-stdlib/tlsutil v0.1.1 // indirect
github.com/hashicorp/mdns v1.0.1 // indirect
github.com/hashicorp/mdns v1.0.4 // indirect
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 // indirect
github.com/huandu/xstrings v1.3.2 // indirect
github.com/imdario/mergo v0.3.12 // indirect

12
go.sum
View File

@ -772,10 +772,12 @@ github.com/hashicorp/hil v0.0.0-20160711231837-1e86c6b523c5/go.mod h1:KHvg/R2/dP
github.com/hashicorp/logutils v1.0.0 h1:dLEQVugN8vlakKOUE3ihGLTZJRB4j+M2cdTm/ORI65Y=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
github.com/hashicorp/mdns v1.0.1 h1:XFSOubp8KWB+Jd2PDyaX5xUd5bhSP/+pTDZVDMzZJM8=
github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY=
github.com/hashicorp/mdns v1.0.4 h1:sY0CMhFmjIPDMlTB+HfymFHCaYLhgifZ0QhjaYKD/UQ=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/memberlist v0.3.1 h1:MXgUXLqva1QvpVEDQW1IQLG0wivQAtmFlHRQ+1vWZfM=
github.com/hashicorp/memberlist v0.3.1/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69 h1:lc3c72qGlIMDqQpQH82Y4vaglRMMFdJbziYWriR4UcE=
@ -793,8 +795,9 @@ github.com/hashicorp/raft-boltdb/v2 v2.2.0/go.mod h1:SgPUD5TP20z/bswEr210SnkUFvQ
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
github.com/hashicorp/serf v0.9.3/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.4/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.5 h1:EBWvyu9tcRszt3Bxp3KNssBMP1KuHWyO51lz9+786iM=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.7 h1:hkdgbqizGQHuU5IPqYM1JdSMV8nKfpuOnZYXssk9muY=
github.com/hashicorp/serf v0.9.7/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q=
github.com/hashicorp/vault/api v1.0.5-0.20190730042357-746c0b111519/go.mod h1:i9PKqwFko/s/aihU1uuHGh/FaQS+Xcgvd9dvnfAvQb0=
github.com/hashicorp/vault/api v1.0.5-0.20200519221902-385fac77e20f/go.mod h1:euTFbi2YJgwcju3imEt919lhJKF68nN1cQPq3aA+kBE=
@ -927,8 +930,9 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.26 h1:gPxPSwALAeHJSjarOs00QjVdV9QoBvc1D2ujQUr5BzU=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
@ -1422,6 +1426,7 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
@ -1545,6 +1550,7 @@ golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@ -7,20 +7,18 @@ import (
"errors"
"fmt"
"io"
golog "log"
"math/rand"
"net"
"net/rpc"
"strings"
"time"
golog "log"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-connlimit"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/nomad/state"

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"github.com/hashicorp/serf/testutil/retry"
"github.com/stretchr/testify/require"
)
@ -273,106 +274,72 @@ func TestNomad_BootstrapExpect(t *testing.T) {
func TestNomad_BootstrapExpect_NonVoter(t *testing.T) {
t.Parallel()
dir := tmpDir(t)
defer os.RemoveAll(dir)
dir := t.TempDir()
// Create first server, non-voter
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.DevMode = false
c.DataDir = path.Join(dir, "node1")
c.NumSchedulers = 0
c.NonVoter = true
})
defer cleanupS1()
// Create second server, non-voter
s2, cleanupS2 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.DevMode = false
c.DataDir = path.Join(dir, "node2")
c.NumSchedulers = 0
c.NonVoter = true
})
defer cleanupS2()
// Create third server, non-voter
s3, cleanupS3 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.DevMode = false
c.DataDir = path.Join(dir, "node3")
c.NumSchedulers = 0
c.NonVoter = false
})
defer cleanupS3()
// Join the three servers we have so far
TestJoin(t, s1, s2, s3)
// Assert that we do not bootstrap
// Assert that we do not bootstrap, because BE=2, but only 1 voter
testutil.AssertUntil(testutil.Timeout(time.Second), func() (bool, error) {
_, p := s1.getLeader()
if p != nil {
return false, fmt.Errorf("leader %v", p)
}
return true, nil
}, func(err error) {
t.Fatalf("should not have leader: %v", err)
})
// Add the fourth server that is a voter
// Create fourth server that is a voter
s4, cleanupS4 := TestServer(t, func(c *Config) {
c.BootstrapExpect = 2
c.DevMode = false
c.DataDir = path.Join(dir, "node4")
c.NumSchedulers = 0
c.NonVoter = false
})
defer cleanupS4()
TestJoin(t, s1, s2, s3, s4)
testutil.WaitForResult(func() (bool, error) {
// Retry the join to decrease flakiness
TestJoin(t, s1, s2, s3, s4)
peers, err := s1.numPeers()
if err != nil {
return false, err
}
if peers != 4 {
return false, fmt.Errorf("bad: %#v", peers)
}
peers, err = s2.numPeers()
if err != nil {
return false, err
}
if peers != 4 {
return false, fmt.Errorf("bad: %#v", peers)
}
peers, err = s3.numPeers()
if err != nil {
return false, err
}
if peers != 4 {
return false, fmt.Errorf("bad: %#v", peers)
}
peers, err = s4.numPeers()
if err != nil {
return false, err
}
if peers != 4 {
return false, fmt.Errorf("bad: %#v", peers)
}
if len(s1.localPeers) != 4 {
return false, fmt.Errorf("bad: %#v", s1.localPeers)
}
if len(s2.localPeers) != 4 {
return false, fmt.Errorf("bad: %#v", s2.localPeers)
}
if len(s3.localPeers) != 4 {
return false, fmt.Errorf("bad: %#v", s3.localPeers)
}
if len(s4.localPeers) != 4 {
return false, fmt.Errorf("bad: %#v", s3.localPeers)
}
_, p := s1.getLeader()
if p == nil {
return false, fmt.Errorf("no leader")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
// Join with fourth server (now have quorum)
// Start with 4th server for higher chance of success
TestJoin(t, s4, s3, s2, s1)
// Assert leadership with 4 peers
servers := []*Server{s1, s2, s3, s4}
for _, s := range servers {
testutil.WaitForLeader(t, s.RPC)
retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 4)) })
}
}
func TestNomad_BadExpect(t *testing.T) {