From 1274aa690fca098fddaaa0856262381d85f746ce Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 24 Feb 2022 16:34:56 -0600 Subject: [PATCH] tests: deflake test that joins a server with non-voting servers to form qourum This PR - upgrades the serf library - has the test start the join process using the un-joined server first - disables schedulers on the servers - uses the WaitForLeader and wantPeers helpers Not sure which, if any of these actually improves the flakiness of this test. --- .changelog/12130.txt | 3 ++ go.mod | 6 ++-- go.sum | 12 +++++-- nomad/rpc.go | 6 ++-- nomad/serf_test.go | 85 ++++++++++++++------------------------------ 5 files changed, 43 insertions(+), 69 deletions(-) create mode 100644 .changelog/12130.txt diff --git a/.changelog/12130.txt b/.changelog/12130.txt new file mode 100644 index 000000000..0f8548b53 --- /dev/null +++ b/.changelog/12130.txt @@ -0,0 +1,3 @@ +```release-note:improvement +deps: Update serf library to v0.9.7 +``` diff --git a/go.mod b/go.mod index 671a0c463..3e522bd89 100644 --- a/go.mod +++ b/go.mod @@ -78,7 +78,7 @@ require ( github.com/hashicorp/nomad/api v0.0.0-20200529203653-c4416b26d3eb github.com/hashicorp/raft v1.3.5 github.com/hashicorp/raft-boltdb/v2 v2.2.0 - github.com/hashicorp/serf v0.9.5 + github.com/hashicorp/serf v0.9.7 github.com/hashicorp/vault/api v1.0.5-0.20200805123347-1ef507638af6 github.com/hashicorp/vault/sdk v0.2.0 github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d @@ -86,7 +86,7 @@ require ( github.com/kr/pretty v0.3.0 github.com/kr/text v0.2.0 github.com/mattn/go-colorable v0.1.9 - github.com/miekg/dns v1.1.26 + github.com/miekg/dns v1.1.41 github.com/mitchellh/cli v1.1.2 github.com/mitchellh/colorstring v0.0.0-20150917214807-8631ce90f286 github.com/mitchellh/copystructure v1.2.0 @@ -205,7 +205,7 @@ require ( github.com/hashicorp/go-secure-stdlib/reloadutil v0.1.1 // indirect github.com/hashicorp/go-secure-stdlib/strutil v0.1.1 // indirect github.com/hashicorp/go-secure-stdlib/tlsutil v0.1.1 // indirect - github.com/hashicorp/mdns v1.0.1 // indirect + github.com/hashicorp/mdns v1.0.4 // indirect github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 // indirect github.com/huandu/xstrings v1.3.2 // indirect github.com/imdario/mergo v0.3.12 // indirect diff --git a/go.sum b/go.sum index 75a8d90dd..9a472d9f0 100644 --- a/go.sum +++ b/go.sum @@ -772,10 +772,12 @@ github.com/hashicorp/hil v0.0.0-20160711231837-1e86c6b523c5/go.mod h1:KHvg/R2/dP github.com/hashicorp/logutils v1.0.0 h1:dLEQVugN8vlakKOUE3ihGLTZJRB4j+M2cdTm/ORI65Y= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/mdns v1.0.1 h1:XFSOubp8KWB+Jd2PDyaX5xUd5bhSP/+pTDZVDMzZJM8= github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= +github.com/hashicorp/mdns v1.0.4 h1:sY0CMhFmjIPDMlTB+HfymFHCaYLhgifZ0QhjaYKD/UQ= +github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= +github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/memberlist v0.3.1 h1:MXgUXLqva1QvpVEDQW1IQLG0wivQAtmFlHRQ+1vWZfM= github.com/hashicorp/memberlist v0.3.1/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69 h1:lc3c72qGlIMDqQpQH82Y4vaglRMMFdJbziYWriR4UcE= @@ -793,8 +795,9 @@ github.com/hashicorp/raft-boltdb/v2 v2.2.0/go.mod h1:SgPUD5TP20z/bswEr210SnkUFvQ github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hashicorp/serf v0.9.3/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.4/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= -github.com/hashicorp/serf v0.9.5 h1:EBWvyu9tcRszt3Bxp3KNssBMP1KuHWyO51lz9+786iM= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= +github.com/hashicorp/serf v0.9.7 h1:hkdgbqizGQHuU5IPqYM1JdSMV8nKfpuOnZYXssk9muY= +github.com/hashicorp/serf v0.9.7/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/vault/api v1.0.4/go.mod h1:gDcqh3WGcR1cpF5AJz/B1UFheUEneMoIospckxBxk6Q= github.com/hashicorp/vault/api v1.0.5-0.20190730042357-746c0b111519/go.mod h1:i9PKqwFko/s/aihU1uuHGh/FaQS+Xcgvd9dvnfAvQb0= github.com/hashicorp/vault/api v1.0.5-0.20200519221902-385fac77e20f/go.mod h1:euTFbi2YJgwcju3imEt919lhJKF68nN1cQPq3aA+kBE= @@ -927,8 +930,9 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5 github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.1.26 h1:gPxPSwALAeHJSjarOs00QjVdV9QoBvc1D2ujQUr5BzU= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= +github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY= +github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= @@ -1422,6 +1426,7 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= @@ -1545,6 +1550,7 @@ golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/nomad/rpc.go b/nomad/rpc.go index 0bb4ee6f0..331e56631 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -7,20 +7,18 @@ import ( "errors" "fmt" "io" + golog "log" "math/rand" "net" "net/rpc" "strings" "time" - golog "log" - metrics "github.com/armon/go-metrics" + "github.com/hashicorp/consul/lib" "github.com/hashicorp/go-connlimit" log "github.com/hashicorp/go-hclog" memdb "github.com/hashicorp/go-memdb" - - "github.com/hashicorp/consul/lib" "github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/nomad/helper/pool" "github.com/hashicorp/nomad/nomad/state" diff --git a/nomad/serf_test.go b/nomad/serf_test.go index ad4e90b50..7444368b1 100644 --- a/nomad/serf_test.go +++ b/nomad/serf_test.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" + "github.com/hashicorp/serf/testutil/retry" "github.com/stretchr/testify/require" ) @@ -273,106 +274,72 @@ func TestNomad_BootstrapExpect(t *testing.T) { func TestNomad_BootstrapExpect_NonVoter(t *testing.T) { t.Parallel() - dir := tmpDir(t) - defer os.RemoveAll(dir) + dir := t.TempDir() + // Create first server, non-voter s1, cleanupS1 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 c.DevMode = false c.DataDir = path.Join(dir, "node1") + c.NumSchedulers = 0 c.NonVoter = true }) defer cleanupS1() + + // Create second server, non-voter s2, cleanupS2 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 c.DevMode = false c.DataDir = path.Join(dir, "node2") + c.NumSchedulers = 0 c.NonVoter = true }) defer cleanupS2() + + // Create third server, non-voter s3, cleanupS3 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 c.DevMode = false c.DataDir = path.Join(dir, "node3") + c.NumSchedulers = 0 + c.NonVoter = false }) defer cleanupS3() + + // Join the three servers we have so far TestJoin(t, s1, s2, s3) - // Assert that we do not bootstrap + // Assert that we do not bootstrap, because BE=2, but only 1 voter testutil.AssertUntil(testutil.Timeout(time.Second), func() (bool, error) { _, p := s1.getLeader() if p != nil { return false, fmt.Errorf("leader %v", p) } - return true, nil }, func(err error) { t.Fatalf("should not have leader: %v", err) }) - // Add the fourth server that is a voter + // Create fourth server that is a voter s4, cleanupS4 := TestServer(t, func(c *Config) { c.BootstrapExpect = 2 c.DevMode = false c.DataDir = path.Join(dir, "node4") + c.NumSchedulers = 0 + c.NonVoter = false }) defer cleanupS4() - TestJoin(t, s1, s2, s3, s4) - testutil.WaitForResult(func() (bool, error) { - // Retry the join to decrease flakiness - TestJoin(t, s1, s2, s3, s4) - peers, err := s1.numPeers() - if err != nil { - return false, err - } - if peers != 4 { - return false, fmt.Errorf("bad: %#v", peers) - } - peers, err = s2.numPeers() - if err != nil { - return false, err - } - if peers != 4 { - return false, fmt.Errorf("bad: %#v", peers) - } - peers, err = s3.numPeers() - if err != nil { - return false, err - } - if peers != 4 { - return false, fmt.Errorf("bad: %#v", peers) - } - peers, err = s4.numPeers() - if err != nil { - return false, err - } - if peers != 4 { - return false, fmt.Errorf("bad: %#v", peers) - } - - if len(s1.localPeers) != 4 { - return false, fmt.Errorf("bad: %#v", s1.localPeers) - } - if len(s2.localPeers) != 4 { - return false, fmt.Errorf("bad: %#v", s2.localPeers) - } - if len(s3.localPeers) != 4 { - return false, fmt.Errorf("bad: %#v", s3.localPeers) - } - if len(s4.localPeers) != 4 { - return false, fmt.Errorf("bad: %#v", s3.localPeers) - } - - _, p := s1.getLeader() - if p == nil { - return false, fmt.Errorf("no leader") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + // Join with fourth server (now have quorum) + // Start with 4th server for higher chance of success + TestJoin(t, s4, s3, s2, s1) + // Assert leadership with 4 peers + servers := []*Server{s1, s2, s3, s4} + for _, s := range servers { + testutil.WaitForLeader(t, s.RPC) + retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 4)) }) + } } func TestNomad_BadExpect(t *testing.T) {