Add expect bootstrap '-expect=n' mode.

This allows for us to automatically bootstrap a cluster of nodes after
'n' number of server nodes join. All servers must have the same 'n' set, or
they will fail to join the cluster; all servers will not join the peer set
until they hit 'n' server nodes.

If the raft commit index is not empty, '-expect=n' does nothing because it
thinks you've already bootstrapped.

Signed-off-by: Robert Xu <robxu9@gmail.com>
This commit is contained in:
Robert Xu 2014-06-16 17:36:12 -04:00
parent 0577ec21a8
commit 31c392813c
10 changed files with 316 additions and 24 deletions

View File

@ -2,15 +2,16 @@ package agent
import (
"fmt"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/serf/serf"
"io"
"log"
"net"
"os"
"strconv"
"sync"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/serf/serf"
)
/*
@ -171,6 +172,9 @@ func (a *Agent) consulConfig() *consul.Config {
if a.config.Bootstrap {
base.Bootstrap = true
}
if a.config.Expect != 0 {
base.Expect = a.config.Expect
}
if a.config.Protocol > 0 {
base.ProtocolVersion = uint8(a.config.Protocol)
}

View File

@ -3,10 +3,6 @@ package agent
import (
"flag"
"fmt"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/mitchellh/cli"
"io"
"net"
"os"
@ -16,6 +12,11 @@ import (
"strings"
"syscall"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/mitchellh/cli"
)
// gracefulTimeout controls how long we wait before forcefully terminating
@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config {
cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server")
cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode")
cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode")
cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)")
cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to")
@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config {
return nil
}
// Expect can only work when acting as a server
if config.Expect != 0 && !config.Server {
c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled")
return nil
}
// Expect & Bootstrap are mutually exclusive
if config.Expect != 0 && config.Bootstrap {
c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive")
return nil
}
// Warn if we are in expect mode
if config.Expect != 0 {
if config.Expect == 1 {
// just use bootstrap mode
c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.")
config.Expect = 0
config.Bootstrap = true
} else {
c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect))
}
}
// Warn if we are in bootstrap mode
if config.Bootstrap {
c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary")
@ -524,6 +550,7 @@ Options:
order.
-data-dir=path Path to a data directory to store agent state
-dc=east-aws Datacenter of the agent
-expect=0 Sets server to expect bootstrap mode.
-join=1.2.3.4 Address of an agent to join at start time.
Can be specified multiple times.
-log-level=info Log level of the agent.

View File

@ -4,8 +4,6 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"github.com/hashicorp/consul/consul"
"github.com/mitchellh/mapstructure"
"io"
"net"
"os"
@ -13,6 +11,9 @@ import (
"sort"
"strings"
"time"
"github.com/hashicorp/consul/consul"
"github.com/mitchellh/mapstructure"
)
// Ports is used to simplify the configuration by
@ -64,6 +65,10 @@ type Config struct {
// permits that node to elect itself leader
Bootstrap bool `mapstructure:"bootstrap"`
// Expect tries to automatically bootstrap the Consul cluster,
// by witholding peers until enough servers join.
Expect int `mapstructure:"expect"`
// Server controls if this agent acts like a Consul server,
// or merely as a client. Servers have more state, take part
// in leader election, etc.
@ -219,6 +224,7 @@ type dirEnts []os.FileInfo
func DefaultConfig() *Config {
return &Config{
Bootstrap: false,
Expect: 0,
Server: false,
Datacenter: consul.DefaultDC,
Domain: "consul.",
@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config {
if b.Bootstrap {
result.Bootstrap = true
}
if b.Expect != 0 {
result.Expect = b.Expect
}
if b.Datacenter != "" {
result.Datacenter = b.Datacenter
}

View File

@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) {
t.Fatalf("bad: %#v", config)
}
// Expect bootstrap
input = `{"server": true, "expect": 3}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if !config.Server {
t.Fatalf("bad: %#v", config)
}
if config.Expect != 3 {
t.Fatalf("bad: %#v", config)
}
// DNS setup
input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) {
func TestMergeConfig(t *testing.T) {
a := &Config{
Bootstrap: false,
Expect: 0,
Datacenter: "dc1",
DataDir: "/tmp/foo",
DNSRecursor: "127.0.0.1:1001",
@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) {
b := &Config{
Bootstrap: true,
Expect: 3,
Datacenter: "dc2",
DataDir: "/tmp/bar",
DNSRecursor: "127.0.0.2:1001",

View File

@ -44,6 +44,11 @@ type Config struct {
// other nodes being present
Bootstrap bool
// Expect mode is used to automatically bring up a collection of
// Consul servers. This can be used to automatically bring up a collection
// of nodes.
Expect int
// Datacenter is the datacenter this Consul server represents
Datacenter string

View File

@ -1,13 +1,14 @@
package consul
import (
"net"
"strconv"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"net"
"strconv"
"time"
)
const (
@ -368,6 +369,57 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error {
}
}
// Or, check for possibility that expect is not the same.
if parts.Expect != 0 {
members := s.serfLAN.Members()
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect != parts.Expect {
s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name)
return nil
}
}
}
// If we're not a bootstrapped server, we're expecting servers,
// and our raft index is zero, try to auto bootstrap.
if !s.config.Bootstrap && s.config.Expect != 0 {
if index, _ := s.raftStore.LastIndex(); index == 0 {
// do not do standard op and add peer... yet
count := 0
members := s.serfLAN.Members()
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect == parts.Expect {
count++
if count >= s.config.Expect {
break
}
}
}
if count >= s.config.Expect {
// we've met expected limit - add servers
s.config.RaftConfig.EnableSingleNode = false
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect != parts.Expect {
addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port}
future := s.raft.AddPeer(addAddr)
if err := future.Error(); err != nil && err != raft.ErrKnownPeer {
s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err)
// hmm....
}
}
}
} else {
// not enough servers yet
return nil
}
}
}
// Attempt to add as a peer
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port}
future := s.raft.AddPeer(addr)

View File

@ -4,9 +4,6 @@ import (
"crypto/tls"
"errors"
"fmt"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-mdb"
"github.com/hashicorp/serf/serf"
"log"
"net"
"net/rpc"
@ -17,6 +14,10 @@ import (
"strconv"
"sync"
"time"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-mdb"
"github.com/hashicorp/serf/serf"
)
// These are the protocol versions that Consul can _understand_. These are
@ -233,6 +234,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
if s.config.Bootstrap {
conf.Tags["bootstrap"] = "1"
}
conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect)
conf.MemberlistConfig.LogOutput = s.config.LogOutput
conf.LogOutput = s.config.LogOutput
conf.EventCh = ch
@ -252,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
// setupRaft is used to setup and initialize Raft
func (s *Server) setupRaft() error {
// If we are in bootstrap mode, enable a single node cluster
if s.config.Bootstrap {
// If we are in bootstrap or expect mode, enable a single node cluster
if s.config.Bootstrap || s.config.Expect != 0 {
s.config.RaftConfig.EnableSingleNode = true
}

View File

@ -3,12 +3,13 @@ package consul
import (
"errors"
"fmt"
"github.com/hashicorp/consul/testutil"
"io/ioutil"
"net"
"os"
"testing"
"time"
"github.com/hashicorp/consul/testutil"
)
var nextPort = 15000
@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se
return dir, server
}
func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) {
name := fmt.Sprintf("Node %d", getPort())
dir, config := testServerConfig(t, name)
config.Datacenter = dc
config.Bootstrap = false
config.Expect = expect
server, err := NewServer(config)
if err != nil {
t.Fatalf("err: %v", err)
}
return dir, server
}
func TestServer_StartStop(t *testing.T) {
dir := tmpDir(t)
defer os.RemoveAll(dir)
@ -304,3 +318,145 @@ func TestServer_JoinLAN_TLS(t *testing.T) {
t.Fatalf("no peer established")
})
}
func TestServer_Expect(t *testing.T) {
// all test servers should be in expect=3 mode
dir1, s1 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, s2 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir2)
defer s2.Shutdown()
dir3, s3 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir3)
defer s3.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := s2.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p1 []net.Addr
var p2 []net.Addr
// should have no peers yet
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
// join the third node
if _, err := s3.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p3 []net.Addr
// should now have all three peers
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p3, _ = s3.raftPeers.Peers()
return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
}
func TestServer_BadExpect(t *testing.T) {
// this one is in expect=3 mode
dir1, s1 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
// this one is in expect=2 mode
dir2, s2 := testServerDCExpect(t, "dc1", 2)
defer os.RemoveAll(dir2)
defer s2.Shutdown()
// and this one is in expect=3 mode
dir3, s3 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir3)
defer s3.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := s2.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p1 []net.Addr
var p2 []net.Addr
// should have no peers yet
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
// join the third node
if _, err := s3.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p3 []net.Addr
// should still have no peers (because s2 is in expect=2 mode)
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p3, _ = s3.raftPeers.Peers()
return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
}

View File

@ -4,12 +4,13 @@ import (
crand "crypto/rand"
"encoding/binary"
"fmt"
"github.com/hashicorp/serf/serf"
"net"
"os"
"path/filepath"
"runtime"
"strconv"
"github.com/hashicorp/serf/serf"
)
/*
@ -26,6 +27,7 @@ type serverParts struct {
Datacenter string
Port int
Bootstrap bool
Expect int
Version int
Addr net.Addr
}
@ -84,6 +86,12 @@ func isConsulServer(m serf.Member) (bool, *serverParts) {
datacenter := m.Tags["dc"]
_, bootstrap := m.Tags["bootstrap"]
expect_str := m.Tags["expect"]
expect, err := strconv.Atoi(expect_str)
if err != nil {
return false, nil
}
port_str := m.Tags["port"]
port, err := strconv.Atoi(port_str)
if err != nil {
@ -103,6 +111,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) {
Datacenter: datacenter,
Port: port,
Bootstrap: bootstrap,
Expect: expect,
Addr: addr,
Version: vsn,
}

View File

@ -1,10 +1,11 @@
package consul
import (
"github.com/hashicorp/serf/serf"
"net"
"regexp"
"testing"
"github.com/hashicorp/serf/serf"
)
func TestStrContains(t *testing.T) {
@ -40,10 +41,11 @@ func TestIsConsulServer(t *testing.T) {
Name: "foo",
Addr: net.IP([]byte{127, 0, 0, 1}),
Tags: map[string]string{
"role": "consul",
"dc": "east-aws",
"port": "10000",
"vsn": "1",
"expect": "0",
"role": "consul",
"dc": "east-aws",
"port": "10000",
"vsn": "1",
},
}
valid, parts := isConsulServer(m)
@ -56,6 +58,9 @@ func TestIsConsulServer(t *testing.T) {
if parts.Bootstrap {
t.Fatalf("unexpected bootstrap")
}
if parts.Expect != 0 {
t.Fatalf("bad: %v", parts.Expect)
}
m.Tags["bootstrap"] = "1"
valid, parts = isConsulServer(m)
if !valid || !parts.Bootstrap {
@ -67,6 +72,12 @@ func TestIsConsulServer(t *testing.T) {
if parts.Version != 1 {
t.Fatalf("bad: %v", parts)
}
m.Tags["expect"] = "3"
delete(m.Tags, "bootstrap")
valid, parts = isConsulServer(m)
if !valid || parts.Expect != 3 {
t.Fatalf("bad: %v", parts.Expect)
}
}
func TestIsConsulNode(t *testing.T) {