From c18c068bf21bb2797abcbf07a79781fb50fdc70d Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Thu, 4 Jun 2015 13:02:39 +0200 Subject: [PATCH] nomad: testing remove peer --- nomad/serf.go | 59 ++++++++++++++---------------- nomad/serf_test.go | 83 +++++++++++++++++++++++++++++++++++++++---- nomad/server.go | 1 + nomad/server_test.go | 1 + testutil/wait.go | 24 +++++++++++++ testutil/wait_test.go | 1 + 6 files changed, 129 insertions(+), 40 deletions(-) create mode 100644 testutil/wait.go create mode 100644 testutil/wait_test.go diff --git a/nomad/serf.go b/nomad/serf.go index f8b0600a4..06c936c67 100644 --- a/nomad/serf.go +++ b/nomad/serf.go @@ -123,41 +123,34 @@ func (s *Server) maybeBootstrap() { // nodeFailed is used to handle fail events on the serf cluster func (s *Server) nodeFailed(me serf.MemberEvent) { - //for _, m := range me.Members { - // ok, parts := isConsulServer(m) - // if !ok { - // continue - // } - // s.logger.Printf("[INFO] consul: removing server %s", parts) + for _, m := range me.Members { + ok, parts := isNomadServer(m) + if !ok { + continue + } + s.logger.Printf("[INFO] nomad: removing server %s", parts) - // // Remove the server if known - // s.remoteLock.Lock() - // existing := s.remoteConsuls[parts.Datacenter] - // n := len(existing) - // for i := 0; i < n; i++ { - // if existing[i].Name == parts.Name { - // existing[i], existing[n-1] = existing[n-1], nil - // existing = existing[:n-1] - // n-- - // break - // } - // } + // Remove the server if known + s.peerLock.Lock() + existing := s.peers[parts.Region] + n := len(existing) + for i := 0; i < n; i++ { + if existing[i].Name == parts.Name { + existing[i], existing[n-1] = existing[n-1], nil + existing = existing[:n-1] + n-- + break + } + } - // // Trim the list if all known consuls are dead - // if n == 0 { - // delete(s.remoteConsuls, parts.Datacenter) - // } else { - // s.remoteConsuls[parts.Datacenter] = existing - // } - // s.remoteLock.Unlock() - - // // Remove from the local list as well - // if !wan { - // s.localLock.Lock() - // delete(s.localConsuls, parts.Addr.String()) - // s.localLock.Unlock() - // } - //} + // Trim the list there are no known servers in a region + if n == 0 { + delete(s.peers, parts.Region) + } else { + s.peers[parts.Region] = existing + } + s.peerLock.Unlock() + } } // localMemberEvent is used to reconcile Serf events with the diff --git a/nomad/serf_test.go b/nomad/serf_test.go index 5b19e9e91..bc1629888 100644 --- a/nomad/serf_test.go +++ b/nomad/serf_test.go @@ -3,11 +3,17 @@ package nomad import ( "fmt" "testing" + + "github.com/hashicorp/nomad/testutil" ) func TestNomad_JoinPeer(t *testing.T) { s1 := testServer(t, nil) - s2 := testServer(t, nil) + defer s1.Shutdown() + s2 := testServer(t, func(c *Config) { + c.Region = "region2" + }) + defer s2.Shutdown() s2Addr := fmt.Sprintf("127.0.0.1:%d", s2.config.SerfConfig.MemberlistConfig.BindPort) num, err := s1.Join([]string{s2Addr}) @@ -18,10 +24,73 @@ func TestNomad_JoinPeer(t *testing.T) { t.Fatalf("bad: %d", num) } - if members := s1.Members(); len(members) != 2 { - t.Fatalf("bad: %#v", members) - } - if members := s2.Members(); len(members) != 2 { - t.Fatalf("bad: %#v", members) - } + testutil.WaitForResult(func() (bool, error) { + if members := s1.Members(); len(members) != 2 { + return false, fmt.Errorf("bad: %#v", members) + } + if members := s2.Members(); len(members) != 2 { + return false, fmt.Errorf("bad: %#v", members) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + if len(s1.peers) != 2 { + return false, fmt.Errorf("bad: %#v", s1.peers) + } + if len(s2.peers) != 2 { + return false, fmt.Errorf("bad: %#v", s2.peers) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + +func TestNomad_RemovePeer(t *testing.T) { + s1 := testServer(t, nil) + defer s1.Shutdown() + s2 := testServer(t, func(c *Config) { + c.Region = "region2" + }) + defer s2.Shutdown() + s2Addr := fmt.Sprintf("127.0.0.1:%d", s2.config.SerfConfig.MemberlistConfig.BindPort) + + num, err := s1.Join([]string{s2Addr}) + if err != nil { + t.Fatalf("err: %v", err) + } + if num != 1 { + t.Fatalf("bad: %d", num) + } + + testutil.WaitForResult(func() (bool, error) { + if members := s1.Members(); len(members) != 2 { + return false, fmt.Errorf("bad: %#v", members) + } + if members := s2.Members(); len(members) != 2 { + return false, fmt.Errorf("bad: %#v", members) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Leave immediately + s2.Leave() + s2.Shutdown() + + testutil.WaitForResult(func() (bool, error) { + if len(s1.peers) != 1 { + return false, fmt.Errorf("bad: %#v", s1.peers) + } + if len(s2.peers) != 1 { + return false, fmt.Errorf("bad: %#v", s2.peers) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } diff --git a/nomad/server.go b/nomad/server.go index 9bd086b4d..dbf39a200 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -314,6 +314,7 @@ func (s *Server) setupRaft() error { log = store snap = raft.NewDiscardSnapshotStore() peers = &raft.StaticPeers{} + s.raftPeers = peers } else { // Create the base raft path diff --git a/nomad/server_test.go b/nomad/server_test.go index 2bb4b1e0c..0a5d302c0 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -28,6 +28,7 @@ func testServer(t *testing.T, cb func(*Config)) *Server { config.SerfConfig.MemberlistConfig.BindAddr = "127.0.0.1" config.SerfConfig.MemberlistConfig.BindPort = getPort() config.SerfConfig.MemberlistConfig.SuspicionMult = 2 + config.SerfConfig.MemberlistConfig.RetransmitMult = 2 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond diff --git a/testutil/wait.go b/testutil/wait.go new file mode 100644 index 000000000..bc3991fca --- /dev/null +++ b/testutil/wait.go @@ -0,0 +1,24 @@ +package testutil + +import "time" + +type testFn func() (bool, error) +type errorFn func(error) + +func WaitForResult(test testFn, error errorFn) { + retries := 1000 + + for retries > 0 { + time.Sleep(10 * time.Millisecond) + retries-- + + success, err := test() + if success { + return + } + + if retries == 0 { + error(err) + } + } +} diff --git a/testutil/wait_test.go b/testutil/wait_test.go new file mode 100644 index 000000000..110b2e6a7 --- /dev/null +++ b/testutil/wait_test.go @@ -0,0 +1 @@ +package testutil