From 31c392813caf55717600852dfaae8aecc995dc34 Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Mon, 16 Jun 2014 17:36:12 -0400 Subject: [PATCH] Add expect bootstrap '-expect=n' mode. This allows for us to automatically bootstrap a cluster of nodes after 'n' number of server nodes join. All servers must have the same 'n' set, or they will fail to join the cluster; all servers will not join the peer set until they hit 'n' server nodes. If the raft commit index is not empty, '-expect=n' does nothing because it thinks you've already bootstrapped. Signed-off-by: Robert Xu --- command/agent/agent.go | 10 ++- command/agent/command.go | 35 +++++++- command/agent/config.go | 13 ++- command/agent/config_test.go | 17 ++++ consul/config.go | 5 ++ consul/leader.go | 58 ++++++++++++- consul/server.go | 12 +-- consul/server_test.go | 158 ++++++++++++++++++++++++++++++++++- consul/util.go | 11 ++- consul/util_test.go | 21 +++-- 10 files changed, 316 insertions(+), 24 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 62784f8d6..4ee1a6f8f 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -2,15 +2,16 @@ package agent import ( "fmt" - "github.com/hashicorp/consul/consul" - "github.com/hashicorp/consul/consul/structs" - "github.com/hashicorp/serf/serf" "io" "log" "net" "os" "strconv" "sync" + + "github.com/hashicorp/consul/consul" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/serf/serf" ) /* @@ -171,6 +172,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.Expect != 0 { + base.Expect = a.config.Expect + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } diff --git a/command/agent/command.go b/command/agent/command.go index 2c9c5e271..cdc833bc4 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -3,10 +3,6 @@ package agent import ( "flag" "fmt" - "github.com/armon/go-metrics" - "github.com/hashicorp/go-syslog" - "github.com/hashicorp/logutils" - "github.com/mitchellh/cli" "io" "net" "os" @@ -16,6 +12,11 @@ import ( "strings" "syscall" "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-syslog" + "github.com/hashicorp/logutils" + "github.com/mitchellh/cli" ) // gracefulTimeout controls how long we wait before forcefully terminating @@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") + cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config { return nil } + // Expect can only work when acting as a server + if config.Expect != 0 && !config.Server { + c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") + return nil + } + + // Expect & Bootstrap are mutually exclusive + if config.Expect != 0 && config.Bootstrap { + c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive") + return nil + } + + // Warn if we are in expect mode + if config.Expect != 0 { + if config.Expect == 1 { + // just use bootstrap mode + c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") + config.Expect = 0 + config.Bootstrap = true + } else { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) + } + } + // Warn if we are in bootstrap mode if config.Bootstrap { c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary") @@ -524,6 +550,7 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent + -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/command/agent/config.go b/command/agent/config.go index a5a2bc452..c3631429a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -4,8 +4,6 @@ import ( "encoding/base64" "encoding/json" "fmt" - "github.com/hashicorp/consul/consul" - "github.com/mitchellh/mapstructure" "io" "net" "os" @@ -13,6 +11,9 @@ import ( "sort" "strings" "time" + + "github.com/hashicorp/consul/consul" + "github.com/mitchellh/mapstructure" ) // Ports is used to simplify the configuration by @@ -64,6 +65,10 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` + // Expect tries to automatically bootstrap the Consul cluster, + // by witholding peers until enough servers join. + Expect int `mapstructure:"expect"` + // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part // in leader election, etc. @@ -219,6 +224,7 @@ type dirEnts []os.FileInfo func DefaultConfig() *Config { return &Config{ Bootstrap: false, + Expect: 0, Server: false, Datacenter: consul.DefaultDC, Domain: "consul.", @@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } + if b.Expect != 0 { + result.Expect = b.Expect + } if b.Datacenter != "" { result.Datacenter = b.Datacenter } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index b1c83d479..0225630d0 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } + // Expect bootstrap + input = `{"server": true, "expect": 3}` + config, err = DecodeConfig(bytes.NewReader([]byte(input))) + if err != nil { + t.Fatalf("err: %s", err) + } + + if !config.Server { + t.Fatalf("bad: %#v", config) + } + + if config.Expect != 3 { + t.Fatalf("bad: %#v", config) + } + // DNS setup input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) @@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, + Expect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) { b := &Config{ Bootstrap: true, + Expect: 3, Datacenter: "dc2", DataDir: "/tmp/bar", DNSRecursor: "127.0.0.2:1001", diff --git a/consul/config.go b/consul/config.go index 6000177a8..ae6c48282 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,6 +44,11 @@ type Config struct { // other nodes being present Bootstrap bool + // Expect mode is used to automatically bring up a collection of + // Consul servers. This can be used to automatically bring up a collection + // of nodes. + Expect int + // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/leader.go b/consul/leader.go index d09f11185..8cbc84273 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -1,13 +1,14 @@ package consul import ( + "net" + "strconv" + "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" - "net" - "strconv" - "time" ) const ( @@ -368,6 +369,57 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error { } } + // Or, check for possibility that expect is not the same. + if parts.Expect != 0 { + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name) + return nil + } + } + } + + // If we're not a bootstrapped server, we're expecting servers, + // and our raft index is zero, try to auto bootstrap. + if !s.config.Bootstrap && s.config.Expect != 0 { + if index, _ := s.raftStore.LastIndex(); index == 0 { + // do not do standard op and add peer... yet + count := 0 + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect == parts.Expect { + count++ + if count >= s.config.Expect { + break + } + } + } + + if count >= s.config.Expect { + // we've met expected limit - add servers + s.config.RaftConfig.EnableSingleNode = false + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port} + future := s.raft.AddPeer(addAddr) + + if err := future.Error(); err != nil && err != raft.ErrKnownPeer { + s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err) + // hmm.... + } + } + } + } else { + // not enough servers yet + return nil + } + } + } + // Attempt to add as a peer var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} future := s.raft.AddPeer(addr) diff --git a/consul/server.go b/consul/server.go index e7dd195f3..91eafb19b 100644 --- a/consul/server.go +++ b/consul/server.go @@ -4,9 +4,6 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/hashicorp/raft" - "github.com/hashicorp/raft-mdb" - "github.com/hashicorp/serf/serf" "log" "net" "net/rpc" @@ -17,6 +14,10 @@ import ( "strconv" "sync" "time" + + "github.com/hashicorp/raft" + "github.com/hashicorp/raft-mdb" + "github.com/hashicorp/serf/serf" ) // These are the protocol versions that Consul can _understand_. These are @@ -233,6 +234,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch @@ -252,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w // setupRaft is used to setup and initialize Raft func (s *Server) setupRaft() error { - // If we are in bootstrap mode, enable a single node cluster - if s.config.Bootstrap { + // If we are in bootstrap or expect mode, enable a single node cluster + if s.config.Bootstrap || s.config.Expect != 0 { s.config.RaftConfig.EnableSingleNode = true } diff --git a/consul/server_test.go b/consul/server_test.go index b8edc6ef4..a00f7f34a 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -3,12 +3,13 @@ package consul import ( "errors" "fmt" - "github.com/hashicorp/consul/testutil" "io/ioutil" "net" "os" "testing" "time" + + "github.com/hashicorp/consul/testutil" ) var nextPort = 15000 @@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se return dir, server } +func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { + name := fmt.Sprintf("Node %d", getPort()) + dir, config := testServerConfig(t, name) + config.Datacenter = dc + config.Bootstrap = false + config.Expect = expect + server, err := NewServer(config) + if err != nil { + t.Fatalf("err: %v", err) + } + return dir, server +} + func TestServer_StartStop(t *testing.T) { dir := tmpDir(t) defer os.RemoveAll(dir) @@ -304,3 +318,145 @@ func TestServer_JoinLAN_TLS(t *testing.T) { t.Fatalf("no peer established") }) } + +func TestServer_Expect(t *testing.T) { + // all test servers should be in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should now have all three peers + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + +} + +func TestServer_BadExpect(t *testing.T) { + // this one is in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + // this one is in expect=2 mode + dir2, s2 := testServerDCExpect(t, "dc1", 2) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + // and this one is in expect=3 mode + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should still have no peers (because s2 is in expect=2 mode) + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + +} diff --git a/consul/util.go b/consul/util.go index 402ecee73..dc5b6ef2a 100644 --- a/consul/util.go +++ b/consul/util.go @@ -4,12 +4,13 @@ import ( crand "crypto/rand" "encoding/binary" "fmt" - "github.com/hashicorp/serf/serf" "net" "os" "path/filepath" "runtime" "strconv" + + "github.com/hashicorp/serf/serf" ) /* @@ -26,6 +27,7 @@ type serverParts struct { Datacenter string Port int Bootstrap bool + Expect int Version int Addr net.Addr } @@ -84,6 +86,12 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] + expect_str := m.Tags["expect"] + expect, err := strconv.Atoi(expect_str) + if err != nil { + return false, nil + } + port_str := m.Tags["port"] port, err := strconv.Atoi(port_str) if err != nil { @@ -103,6 +111,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { Datacenter: datacenter, Port: port, Bootstrap: bootstrap, + Expect: expect, Addr: addr, Version: vsn, } diff --git a/consul/util_test.go b/consul/util_test.go index 65e5e99ed..e360f523c 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -1,10 +1,11 @@ package consul import ( - "github.com/hashicorp/serf/serf" "net" "regexp" "testing" + + "github.com/hashicorp/serf/serf" ) func TestStrContains(t *testing.T) { @@ -40,10 +41,11 @@ func TestIsConsulServer(t *testing.T) { Name: "foo", Addr: net.IP([]byte{127, 0, 0, 1}), Tags: map[string]string{ - "role": "consul", - "dc": "east-aws", - "port": "10000", - "vsn": "1", + "expect": "0", + "role": "consul", + "dc": "east-aws", + "port": "10000", + "vsn": "1", }, } valid, parts := isConsulServer(m) @@ -56,6 +58,9 @@ func TestIsConsulServer(t *testing.T) { if parts.Bootstrap { t.Fatalf("unexpected bootstrap") } + if parts.Expect != 0 { + t.Fatalf("bad: %v", parts.Expect) + } m.Tags["bootstrap"] = "1" valid, parts = isConsulServer(m) if !valid || !parts.Bootstrap { @@ -67,6 +72,12 @@ func TestIsConsulServer(t *testing.T) { if parts.Version != 1 { t.Fatalf("bad: %v", parts) } + m.Tags["expect"] = "3" + delete(m.Tags, "bootstrap") + valid, parts = isConsulServer(m) + if !valid || parts.Expect != 3 { + t.Fatalf("bad: %v", parts.Expect) + } } func TestIsConsulNode(t *testing.T) {