Add expect bootstrap '-expect=n' mode.

This allows for us to automatically bootstrap a cluster of nodes after
'n' number of server nodes join. All servers must have the same 'n' set, or
they will fail to join the cluster; all servers will not join the peer set
until they hit 'n' server nodes.

If the raft commit index is not empty, '-expect=n' does nothing because it
thinks you've already bootstrapped.

Signed-off-by: Robert Xu <robxu9@gmail.com>
This commit is contained in:
Robert Xu 2014-06-16 17:36:12 -04:00
parent 0577ec21a8
commit 31c392813c
10 changed files with 316 additions and 24 deletions

View file

@ -2,15 +2,16 @@ package agent
import ( import (
"fmt" "fmt"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/serf/serf"
"io" "io"
"log" "log"
"net" "net"
"os" "os"
"strconv" "strconv"
"sync" "sync"
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/serf/serf"
) )
/* /*
@ -171,6 +172,9 @@ func (a *Agent) consulConfig() *consul.Config {
if a.config.Bootstrap { if a.config.Bootstrap {
base.Bootstrap = true base.Bootstrap = true
} }
if a.config.Expect != 0 {
base.Expect = a.config.Expect
}
if a.config.Protocol > 0 { if a.config.Protocol > 0 {
base.ProtocolVersion = uint8(a.config.Protocol) base.ProtocolVersion = uint8(a.config.Protocol)
} }

View file

@ -3,10 +3,6 @@ package agent
import ( import (
"flag" "flag"
"fmt" "fmt"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/mitchellh/cli"
"io" "io"
"net" "net"
"os" "os"
@ -16,6 +12,11 @@ import (
"strings" "strings"
"syscall" "syscall"
"time" "time"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/mitchellh/cli"
) )
// gracefulTimeout controls how long we wait before forcefully terminating // gracefulTimeout controls how long we wait before forcefully terminating
@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config {
cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server")
cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode")
cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode")
cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)")
cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to")
@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config {
return nil return nil
} }
// Expect can only work when acting as a server
if config.Expect != 0 && !config.Server {
c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled")
return nil
}
// Expect & Bootstrap are mutually exclusive
if config.Expect != 0 && config.Bootstrap {
c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive")
return nil
}
// Warn if we are in expect mode
if config.Expect != 0 {
if config.Expect == 1 {
// just use bootstrap mode
c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.")
config.Expect = 0
config.Bootstrap = true
} else {
c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect))
}
}
// Warn if we are in bootstrap mode // Warn if we are in bootstrap mode
if config.Bootstrap { if config.Bootstrap {
c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary") c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary")
@ -524,6 +550,7 @@ Options:
order. order.
-data-dir=path Path to a data directory to store agent state -data-dir=path Path to a data directory to store agent state
-dc=east-aws Datacenter of the agent -dc=east-aws Datacenter of the agent
-expect=0 Sets server to expect bootstrap mode.
-join=1.2.3.4 Address of an agent to join at start time. -join=1.2.3.4 Address of an agent to join at start time.
Can be specified multiple times. Can be specified multiple times.
-log-level=info Log level of the agent. -log-level=info Log level of the agent.

View file

@ -4,8 +4,6 @@ import (
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/hashicorp/consul/consul"
"github.com/mitchellh/mapstructure"
"io" "io"
"net" "net"
"os" "os"
@ -13,6 +11,9 @@ import (
"sort" "sort"
"strings" "strings"
"time" "time"
"github.com/hashicorp/consul/consul"
"github.com/mitchellh/mapstructure"
) )
// Ports is used to simplify the configuration by // Ports is used to simplify the configuration by
@ -64,6 +65,10 @@ type Config struct {
// permits that node to elect itself leader // permits that node to elect itself leader
Bootstrap bool `mapstructure:"bootstrap"` Bootstrap bool `mapstructure:"bootstrap"`
// Expect tries to automatically bootstrap the Consul cluster,
// by witholding peers until enough servers join.
Expect int `mapstructure:"expect"`
// Server controls if this agent acts like a Consul server, // Server controls if this agent acts like a Consul server,
// or merely as a client. Servers have more state, take part // or merely as a client. Servers have more state, take part
// in leader election, etc. // in leader election, etc.
@ -219,6 +224,7 @@ type dirEnts []os.FileInfo
func DefaultConfig() *Config { func DefaultConfig() *Config {
return &Config{ return &Config{
Bootstrap: false, Bootstrap: false,
Expect: 0,
Server: false, Server: false,
Datacenter: consul.DefaultDC, Datacenter: consul.DefaultDC,
Domain: "consul.", Domain: "consul.",
@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config {
if b.Bootstrap { if b.Bootstrap {
result.Bootstrap = true result.Bootstrap = true
} }
if b.Expect != 0 {
result.Expect = b.Expect
}
if b.Datacenter != "" { if b.Datacenter != "" {
result.Datacenter = b.Datacenter result.Datacenter = b.Datacenter
} }

View file

@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) {
t.Fatalf("bad: %#v", config) t.Fatalf("bad: %#v", config)
} }
// Expect bootstrap
input = `{"server": true, "expect": 3}`
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
if err != nil {
t.Fatalf("err: %s", err)
}
if !config.Server {
t.Fatalf("bad: %#v", config)
}
if config.Expect != 3 {
t.Fatalf("bad: %#v", config)
}
// DNS setup // DNS setup
input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}` input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}`
config, err = DecodeConfig(bytes.NewReader([]byte(input))) config, err = DecodeConfig(bytes.NewReader([]byte(input)))
@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) {
func TestMergeConfig(t *testing.T) { func TestMergeConfig(t *testing.T) {
a := &Config{ a := &Config{
Bootstrap: false, Bootstrap: false,
Expect: 0,
Datacenter: "dc1", Datacenter: "dc1",
DataDir: "/tmp/foo", DataDir: "/tmp/foo",
DNSRecursor: "127.0.0.1:1001", DNSRecursor: "127.0.0.1:1001",
@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) {
b := &Config{ b := &Config{
Bootstrap: true, Bootstrap: true,
Expect: 3,
Datacenter: "dc2", Datacenter: "dc2",
DataDir: "/tmp/bar", DataDir: "/tmp/bar",
DNSRecursor: "127.0.0.2:1001", DNSRecursor: "127.0.0.2:1001",

View file

@ -44,6 +44,11 @@ type Config struct {
// other nodes being present // other nodes being present
Bootstrap bool Bootstrap bool
// Expect mode is used to automatically bring up a collection of
// Consul servers. This can be used to automatically bring up a collection
// of nodes.
Expect int
// Datacenter is the datacenter this Consul server represents // Datacenter is the datacenter this Consul server represents
Datacenter string Datacenter string

View file

@ -1,13 +1,14 @@
package consul package consul
import ( import (
"net"
"strconv"
"time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf" "github.com/hashicorp/serf/serf"
"net"
"strconv"
"time"
) )
const ( const (
@ -368,6 +369,57 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error {
} }
} }
// Or, check for possibility that expect is not the same.
if parts.Expect != 0 {
members := s.serfLAN.Members()
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect != parts.Expect {
s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name)
return nil
}
}
}
// If we're not a bootstrapped server, we're expecting servers,
// and our raft index is zero, try to auto bootstrap.
if !s.config.Bootstrap && s.config.Expect != 0 {
if index, _ := s.raftStore.LastIndex(); index == 0 {
// do not do standard op and add peer... yet
count := 0
members := s.serfLAN.Members()
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect == parts.Expect {
count++
if count >= s.config.Expect {
break
}
}
}
if count >= s.config.Expect {
// we've met expected limit - add servers
s.config.RaftConfig.EnableSingleNode = false
for _, member := range members {
valid, p := isConsulServer(member)
if valid && member.Name != m.Name && p.Expect != parts.Expect {
addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port}
future := s.raft.AddPeer(addAddr)
if err := future.Error(); err != nil && err != raft.ErrKnownPeer {
s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err)
// hmm....
}
}
}
} else {
// not enough servers yet
return nil
}
}
}
// Attempt to add as a peer // Attempt to add as a peer
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port}
future := s.raft.AddPeer(addr) future := s.raft.AddPeer(addr)

View file

@ -4,9 +4,6 @@ import (
"crypto/tls" "crypto/tls"
"errors" "errors"
"fmt" "fmt"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-mdb"
"github.com/hashicorp/serf/serf"
"log" "log"
"net" "net"
"net/rpc" "net/rpc"
@ -17,6 +14,10 @@ import (
"strconv" "strconv"
"sync" "sync"
"time" "time"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-mdb"
"github.com/hashicorp/serf/serf"
) )
// These are the protocol versions that Consul can _understand_. These are // These are the protocol versions that Consul can _understand_. These are
@ -233,6 +234,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
if s.config.Bootstrap { if s.config.Bootstrap {
conf.Tags["bootstrap"] = "1" conf.Tags["bootstrap"] = "1"
} }
conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect)
conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.MemberlistConfig.LogOutput = s.config.LogOutput
conf.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput
conf.EventCh = ch conf.EventCh = ch
@ -252,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
// setupRaft is used to setup and initialize Raft // setupRaft is used to setup and initialize Raft
func (s *Server) setupRaft() error { func (s *Server) setupRaft() error {
// If we are in bootstrap mode, enable a single node cluster // If we are in bootstrap or expect mode, enable a single node cluster
if s.config.Bootstrap { if s.config.Bootstrap || s.config.Expect != 0 {
s.config.RaftConfig.EnableSingleNode = true s.config.RaftConfig.EnableSingleNode = true
} }

View file

@ -3,12 +3,13 @@ package consul
import ( import (
"errors" "errors"
"fmt" "fmt"
"github.com/hashicorp/consul/testutil"
"io/ioutil" "io/ioutil"
"net" "net"
"os" "os"
"testing" "testing"
"time" "time"
"github.com/hashicorp/consul/testutil"
) )
var nextPort = 15000 var nextPort = 15000
@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se
return dir, server return dir, server
} }
func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) {
name := fmt.Sprintf("Node %d", getPort())
dir, config := testServerConfig(t, name)
config.Datacenter = dc
config.Bootstrap = false
config.Expect = expect
server, err := NewServer(config)
if err != nil {
t.Fatalf("err: %v", err)
}
return dir, server
}
func TestServer_StartStop(t *testing.T) { func TestServer_StartStop(t *testing.T) {
dir := tmpDir(t) dir := tmpDir(t)
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
@ -304,3 +318,145 @@ func TestServer_JoinLAN_TLS(t *testing.T) {
t.Fatalf("no peer established") t.Fatalf("no peer established")
}) })
} }
func TestServer_Expect(t *testing.T) {
// all test servers should be in expect=3 mode
dir1, s1 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
dir2, s2 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir2)
defer s2.Shutdown()
dir3, s3 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir3)
defer s3.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := s2.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p1 []net.Addr
var p2 []net.Addr
// should have no peers yet
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
// join the third node
if _, err := s3.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p3 []net.Addr
// should now have all three peers
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p3, _ = s3.raftPeers.Peers()
return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 3 peers: %v", err)
})
}
func TestServer_BadExpect(t *testing.T) {
// this one is in expect=3 mode
dir1, s1 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
// this one is in expect=2 mode
dir2, s2 := testServerDCExpect(t, "dc1", 2)
defer os.RemoveAll(dir2)
defer s2.Shutdown()
// and this one is in expect=3 mode
dir3, s3 := testServerDCExpect(t, "dc1", 3)
defer os.RemoveAll(dir3)
defer s3.Shutdown()
// Try to join
addr := fmt.Sprintf("127.0.0.1:%d",
s1.config.SerfLANConfig.MemberlistConfig.BindPort)
if _, err := s2.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p1 []net.Addr
var p2 []net.Addr
// should have no peers yet
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
// join the third node
if _, err := s3.JoinLAN([]string{addr}); err != nil {
t.Fatalf("err: %v", err)
}
var p3 []net.Addr
// should still have no peers (because s2 is in expect=2 mode)
testutil.WaitForResult(func() (bool, error) {
p1, _ = s1.raftPeers.Peers()
return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p2, _ = s2.raftPeers.Peers()
return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
testutil.WaitForResult(func() (bool, error) {
p3, _ = s3.raftPeers.Peers()
return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1))
}, func(err error) {
t.Fatalf("should have 0 peers: %v", err)
})
}

View file

@ -4,12 +4,13 @@ import (
crand "crypto/rand" crand "crypto/rand"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"github.com/hashicorp/serf/serf"
"net" "net"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv" "strconv"
"github.com/hashicorp/serf/serf"
) )
/* /*
@ -26,6 +27,7 @@ type serverParts struct {
Datacenter string Datacenter string
Port int Port int
Bootstrap bool Bootstrap bool
Expect int
Version int Version int
Addr net.Addr Addr net.Addr
} }
@ -84,6 +86,12 @@ func isConsulServer(m serf.Member) (bool, *serverParts) {
datacenter := m.Tags["dc"] datacenter := m.Tags["dc"]
_, bootstrap := m.Tags["bootstrap"] _, bootstrap := m.Tags["bootstrap"]
expect_str := m.Tags["expect"]
expect, err := strconv.Atoi(expect_str)
if err != nil {
return false, nil
}
port_str := m.Tags["port"] port_str := m.Tags["port"]
port, err := strconv.Atoi(port_str) port, err := strconv.Atoi(port_str)
if err != nil { if err != nil {
@ -103,6 +111,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) {
Datacenter: datacenter, Datacenter: datacenter,
Port: port, Port: port,
Bootstrap: bootstrap, Bootstrap: bootstrap,
Expect: expect,
Addr: addr, Addr: addr,
Version: vsn, Version: vsn,
} }

View file

@ -1,10 +1,11 @@
package consul package consul
import ( import (
"github.com/hashicorp/serf/serf"
"net" "net"
"regexp" "regexp"
"testing" "testing"
"github.com/hashicorp/serf/serf"
) )
func TestStrContains(t *testing.T) { func TestStrContains(t *testing.T) {
@ -40,10 +41,11 @@ func TestIsConsulServer(t *testing.T) {
Name: "foo", Name: "foo",
Addr: net.IP([]byte{127, 0, 0, 1}), Addr: net.IP([]byte{127, 0, 0, 1}),
Tags: map[string]string{ Tags: map[string]string{
"role": "consul", "expect": "0",
"dc": "east-aws", "role": "consul",
"port": "10000", "dc": "east-aws",
"vsn": "1", "port": "10000",
"vsn": "1",
}, },
} }
valid, parts := isConsulServer(m) valid, parts := isConsulServer(m)
@ -56,6 +58,9 @@ func TestIsConsulServer(t *testing.T) {
if parts.Bootstrap { if parts.Bootstrap {
t.Fatalf("unexpected bootstrap") t.Fatalf("unexpected bootstrap")
} }
if parts.Expect != 0 {
t.Fatalf("bad: %v", parts.Expect)
}
m.Tags["bootstrap"] = "1" m.Tags["bootstrap"] = "1"
valid, parts = isConsulServer(m) valid, parts = isConsulServer(m)
if !valid || !parts.Bootstrap { if !valid || !parts.Bootstrap {
@ -67,6 +72,12 @@ func TestIsConsulServer(t *testing.T) {
if parts.Version != 1 { if parts.Version != 1 {
t.Fatalf("bad: %v", parts) t.Fatalf("bad: %v", parts)
} }
m.Tags["expect"] = "3"
delete(m.Tags, "bootstrap")
valid, parts = isConsulServer(m)
if !valid || parts.Expect != 3 {
t.Fatalf("bad: %v", parts.Expect)
}
} }
func TestIsConsulNode(t *testing.T) { func TestIsConsulNode(t *testing.T) {