From 31c392813caf55717600852dfaae8aecc995dc34 Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Mon, 16 Jun 2014 17:36:12 -0400 Subject: [PATCH 01/43] Add expect bootstrap '-expect=n' mode. This allows for us to automatically bootstrap a cluster of nodes after 'n' number of server nodes join. All servers must have the same 'n' set, or they will fail to join the cluster; all servers will not join the peer set until they hit 'n' server nodes. If the raft commit index is not empty, '-expect=n' does nothing because it thinks you've already bootstrapped. Signed-off-by: Robert Xu --- command/agent/agent.go | 10 ++- command/agent/command.go | 35 +++++++- command/agent/config.go | 13 ++- command/agent/config_test.go | 17 ++++ consul/config.go | 5 ++ consul/leader.go | 58 ++++++++++++- consul/server.go | 12 +-- consul/server_test.go | 158 ++++++++++++++++++++++++++++++++++- consul/util.go | 11 ++- consul/util_test.go | 21 +++-- 10 files changed, 316 insertions(+), 24 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 62784f8d6..4ee1a6f8f 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -2,15 +2,16 @@ package agent import ( "fmt" - "github.com/hashicorp/consul/consul" - "github.com/hashicorp/consul/consul/structs" - "github.com/hashicorp/serf/serf" "io" "log" "net" "os" "strconv" "sync" + + "github.com/hashicorp/consul/consul" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/serf/serf" ) /* @@ -171,6 +172,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.Expect != 0 { + base.Expect = a.config.Expect + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } diff --git a/command/agent/command.go b/command/agent/command.go index 2c9c5e271..cdc833bc4 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -3,10 +3,6 @@ package agent import ( "flag" "fmt" - "github.com/armon/go-metrics" - "github.com/hashicorp/go-syslog" - "github.com/hashicorp/logutils" - "github.com/mitchellh/cli" "io" "net" "os" @@ -16,6 +12,11 @@ import ( "strings" "syscall" "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-syslog" + "github.com/hashicorp/logutils" + "github.com/mitchellh/cli" ) // gracefulTimeout controls how long we wait before forcefully terminating @@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") + cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config { return nil } + // Expect can only work when acting as a server + if config.Expect != 0 && !config.Server { + c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") + return nil + } + + // Expect & Bootstrap are mutually exclusive + if config.Expect != 0 && config.Bootstrap { + c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive") + return nil + } + + // Warn if we are in expect mode + if config.Expect != 0 { + if config.Expect == 1 { + // just use bootstrap mode + c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") + config.Expect = 0 + config.Bootstrap = true + } else { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) + } + } + // Warn if we are in bootstrap mode if config.Bootstrap { c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary") @@ -524,6 +550,7 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent + -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/command/agent/config.go b/command/agent/config.go index a5a2bc452..c3631429a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -4,8 +4,6 @@ import ( "encoding/base64" "encoding/json" "fmt" - "github.com/hashicorp/consul/consul" - "github.com/mitchellh/mapstructure" "io" "net" "os" @@ -13,6 +11,9 @@ import ( "sort" "strings" "time" + + "github.com/hashicorp/consul/consul" + "github.com/mitchellh/mapstructure" ) // Ports is used to simplify the configuration by @@ -64,6 +65,10 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` + // Expect tries to automatically bootstrap the Consul cluster, + // by witholding peers until enough servers join. + Expect int `mapstructure:"expect"` + // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part // in leader election, etc. @@ -219,6 +224,7 @@ type dirEnts []os.FileInfo func DefaultConfig() *Config { return &Config{ Bootstrap: false, + Expect: 0, Server: false, Datacenter: consul.DefaultDC, Domain: "consul.", @@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } + if b.Expect != 0 { + result.Expect = b.Expect + } if b.Datacenter != "" { result.Datacenter = b.Datacenter } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index b1c83d479..0225630d0 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } + // Expect bootstrap + input = `{"server": true, "expect": 3}` + config, err = DecodeConfig(bytes.NewReader([]byte(input))) + if err != nil { + t.Fatalf("err: %s", err) + } + + if !config.Server { + t.Fatalf("bad: %#v", config) + } + + if config.Expect != 3 { + t.Fatalf("bad: %#v", config) + } + // DNS setup input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) @@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, + Expect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) { b := &Config{ Bootstrap: true, + Expect: 3, Datacenter: "dc2", DataDir: "/tmp/bar", DNSRecursor: "127.0.0.2:1001", diff --git a/consul/config.go b/consul/config.go index 6000177a8..ae6c48282 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,6 +44,11 @@ type Config struct { // other nodes being present Bootstrap bool + // Expect mode is used to automatically bring up a collection of + // Consul servers. This can be used to automatically bring up a collection + // of nodes. + Expect int + // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/leader.go b/consul/leader.go index d09f11185..8cbc84273 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -1,13 +1,14 @@ package consul import ( + "net" + "strconv" + "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" - "net" - "strconv" - "time" ) const ( @@ -368,6 +369,57 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error { } } + // Or, check for possibility that expect is not the same. + if parts.Expect != 0 { + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name) + return nil + } + } + } + + // If we're not a bootstrapped server, we're expecting servers, + // and our raft index is zero, try to auto bootstrap. + if !s.config.Bootstrap && s.config.Expect != 0 { + if index, _ := s.raftStore.LastIndex(); index == 0 { + // do not do standard op and add peer... yet + count := 0 + members := s.serfLAN.Members() + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect == parts.Expect { + count++ + if count >= s.config.Expect { + break + } + } + } + + if count >= s.config.Expect { + // we've met expected limit - add servers + s.config.RaftConfig.EnableSingleNode = false + for _, member := range members { + valid, p := isConsulServer(member) + if valid && member.Name != m.Name && p.Expect != parts.Expect { + addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port} + future := s.raft.AddPeer(addAddr) + + if err := future.Error(); err != nil && err != raft.ErrKnownPeer { + s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err) + // hmm.... + } + } + } + } else { + // not enough servers yet + return nil + } + } + } + // Attempt to add as a peer var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} future := s.raft.AddPeer(addr) diff --git a/consul/server.go b/consul/server.go index e7dd195f3..91eafb19b 100644 --- a/consul/server.go +++ b/consul/server.go @@ -4,9 +4,6 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/hashicorp/raft" - "github.com/hashicorp/raft-mdb" - "github.com/hashicorp/serf/serf" "log" "net" "net/rpc" @@ -17,6 +14,10 @@ import ( "strconv" "sync" "time" + + "github.com/hashicorp/raft" + "github.com/hashicorp/raft-mdb" + "github.com/hashicorp/serf/serf" ) // These are the protocol versions that Consul can _understand_. These are @@ -233,6 +234,7 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch @@ -252,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w // setupRaft is used to setup and initialize Raft func (s *Server) setupRaft() error { - // If we are in bootstrap mode, enable a single node cluster - if s.config.Bootstrap { + // If we are in bootstrap or expect mode, enable a single node cluster + if s.config.Bootstrap || s.config.Expect != 0 { s.config.RaftConfig.EnableSingleNode = true } diff --git a/consul/server_test.go b/consul/server_test.go index b8edc6ef4..a00f7f34a 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -3,12 +3,13 @@ package consul import ( "errors" "fmt" - "github.com/hashicorp/consul/testutil" "io/ioutil" "net" "os" "testing" "time" + + "github.com/hashicorp/consul/testutil" ) var nextPort = 15000 @@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se return dir, server } +func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { + name := fmt.Sprintf("Node %d", getPort()) + dir, config := testServerConfig(t, name) + config.Datacenter = dc + config.Bootstrap = false + config.Expect = expect + server, err := NewServer(config) + if err != nil { + t.Fatalf("err: %v", err) + } + return dir, server +} + func TestServer_StartStop(t *testing.T) { dir := tmpDir(t) defer os.RemoveAll(dir) @@ -304,3 +318,145 @@ func TestServer_JoinLAN_TLS(t *testing.T) { t.Fatalf("no peer established") }) } + +func TestServer_Expect(t *testing.T) { + // all test servers should be in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should now have all three peers + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + +} + +func TestServer_BadExpect(t *testing.T) { + // this one is in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + // this one is in expect=2 mode + dir2, s2 := testServerDCExpect(t, "dc1", 2) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + // and this one is in expect=3 mode + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should still have no peers (because s2 is in expect=2 mode) + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + +} diff --git a/consul/util.go b/consul/util.go index 402ecee73..dc5b6ef2a 100644 --- a/consul/util.go +++ b/consul/util.go @@ -4,12 +4,13 @@ import ( crand "crypto/rand" "encoding/binary" "fmt" - "github.com/hashicorp/serf/serf" "net" "os" "path/filepath" "runtime" "strconv" + + "github.com/hashicorp/serf/serf" ) /* @@ -26,6 +27,7 @@ type serverParts struct { Datacenter string Port int Bootstrap bool + Expect int Version int Addr net.Addr } @@ -84,6 +86,12 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] + expect_str := m.Tags["expect"] + expect, err := strconv.Atoi(expect_str) + if err != nil { + return false, nil + } + port_str := m.Tags["port"] port, err := strconv.Atoi(port_str) if err != nil { @@ -103,6 +111,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { Datacenter: datacenter, Port: port, Bootstrap: bootstrap, + Expect: expect, Addr: addr, Version: vsn, } diff --git a/consul/util_test.go b/consul/util_test.go index 65e5e99ed..e360f523c 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -1,10 +1,11 @@ package consul import ( - "github.com/hashicorp/serf/serf" "net" "regexp" "testing" + + "github.com/hashicorp/serf/serf" ) func TestStrContains(t *testing.T) { @@ -40,10 +41,11 @@ func TestIsConsulServer(t *testing.T) { Name: "foo", Addr: net.IP([]byte{127, 0, 0, 1}), Tags: map[string]string{ - "role": "consul", - "dc": "east-aws", - "port": "10000", - "vsn": "1", + "expect": "0", + "role": "consul", + "dc": "east-aws", + "port": "10000", + "vsn": "1", }, } valid, parts := isConsulServer(m) @@ -56,6 +58,9 @@ func TestIsConsulServer(t *testing.T) { if parts.Bootstrap { t.Fatalf("unexpected bootstrap") } + if parts.Expect != 0 { + t.Fatalf("bad: %v", parts.Expect) + } m.Tags["bootstrap"] = "1" valid, parts = isConsulServer(m) if !valid || !parts.Bootstrap { @@ -67,6 +72,12 @@ func TestIsConsulServer(t *testing.T) { if parts.Version != 1 { t.Fatalf("bad: %v", parts) } + m.Tags["expect"] = "3" + delete(m.Tags, "bootstrap") + valid, parts = isConsulServer(m) + if !valid || parts.Expect != 3 { + t.Fatalf("bad: %v", parts.Expect) + } } func TestIsConsulNode(t *testing.T) { From 4db6d83f44f66273b08ff3f5d6fb34b7021266b4 Mon Sep 17 00:00:00 2001 From: Phillip Markert Date: Tue, 17 Jun 2014 09:11:13 -0400 Subject: [PATCH 02/43] website: Fixed semantic meaning of upstream in DNS guide --- website/source/docs/guides/dns-cache.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/guides/dns-cache.html.markdown b/website/source/docs/guides/dns-cache.html.markdown index 37d79fc47..007d3ed71 100644 --- a/website/source/docs/guides/dns-cache.html.markdown +++ b/website/source/docs/guides/dns-cache.html.markdown @@ -41,7 +41,7 @@ of the leader. ## TTL Values -TTL values can be set to allow DNS results to be cached upstream +TTL values can be set to allow DNS results to be cached downstream of Consul which can be reduce the number of lookups and to amortize the latency of doing a DNS lookup. By default, all TTLs are zero, preventing any caching. From be0554778d3f753e0a7def898d6e70133117b111 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 17 Jun 2014 16:48:19 -0700 Subject: [PATCH 03/43] agent: Fix issues with re-registration. Fixes #216 --- command/agent/agent.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 62784f8d6..65654ee04 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -393,7 +393,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkType *CheckType) err ServiceName: service.Service, } if err := a.AddCheck(check, chkType); err != nil { - a.state.RemoveService(service.ID) return err } } @@ -429,8 +428,8 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType) error { // Check if already registered if chkType != nil { if chkType.IsTTL() { - if _, ok := a.checkTTLs[check.CheckID]; ok { - return fmt.Errorf("CheckID is already registered") + if existing, ok := a.checkTTLs[check.CheckID]; ok { + existing.Stop() } ttl := &CheckTTL{ @@ -443,8 +442,8 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType) error { a.checkTTLs[check.CheckID] = ttl } else { - if _, ok := a.checkMonitors[check.CheckID]; ok { - return fmt.Errorf("CheckID is already registered") + if existing, ok := a.checkMonitors[check.CheckID]; ok { + existing.Stop() } if chkType.Interval < MinInterval { a.logger.Println(fmt.Sprintf("[WARN] agent: check '%s' has interval below minimum of %v", From a2fea2ce5571ab38991fae7f990ef72735b95f5d Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Wed, 18 Jun 2014 12:03:30 -0400 Subject: [PATCH 04/43] Utilise new raft.SetPeers() method, move expect logic to leader.go. This way, we don't use EnableSingleMode, nor cause chaos adding peers. Signed-off-by: Robert Xu --- command/agent/command.go | 2 +- consul/leader.go | 51 ---------------------------------------- consul/serf.go | 39 +++++++++++++++++++++++++++++- consul/server.go | 4 ++-- 4 files changed, 41 insertions(+), 55 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index cdc833bc4..25e04dc44 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -137,7 +137,7 @@ func (c *Command) readConfig() *Config { // Expect & Bootstrap are mutually exclusive if config.Expect != 0 && config.Bootstrap { - c.Ui.Error("Expect mode and Bootstrap mode are mutually exclusive") + c.Ui.Error("Bootstrap cannot be provided with an expected server count") return nil } diff --git a/consul/leader.go b/consul/leader.go index 8cbc84273..b63f7bbe8 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -369,57 +369,6 @@ func (s *Server) joinConsulServer(m serf.Member, parts *serverParts) error { } } - // Or, check for possibility that expect is not the same. - if parts.Expect != 0 { - members := s.serfLAN.Members() - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect != parts.Expect { - s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, not adding Raft peer.", m.Name, member.Name) - return nil - } - } - } - - // If we're not a bootstrapped server, we're expecting servers, - // and our raft index is zero, try to auto bootstrap. - if !s.config.Bootstrap && s.config.Expect != 0 { - if index, _ := s.raftStore.LastIndex(); index == 0 { - // do not do standard op and add peer... yet - count := 0 - members := s.serfLAN.Members() - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect == parts.Expect { - count++ - if count >= s.config.Expect { - break - } - } - } - - if count >= s.config.Expect { - // we've met expected limit - add servers - s.config.RaftConfig.EnableSingleNode = false - for _, member := range members { - valid, p := isConsulServer(member) - if valid && member.Name != m.Name && p.Expect != parts.Expect { - addAddr := &net.TCPAddr{IP: member.Addr, Port: p.Port} - future := s.raft.AddPeer(addAddr) - - if err := future.Error(); err != nil && err != raft.ErrKnownPeer { - s.logger.Printf("[ERR] consul: failed to add raft peer: %v", err) - // hmm.... - } - } - } - } else { - // not enough servers yet - return nil - } - } - } - // Attempt to add as a peer var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port} future := s.raft.AddPeer(addr) diff --git a/consul/serf.go b/consul/serf.go index 11a48ee47..8a34cc9b3 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -1,8 +1,10 @@ package consul import ( - "github.com/hashicorp/serf/serf" + "net" "strings" + + "github.com/hashicorp/serf/serf" ) const ( @@ -149,6 +151,41 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { s.localConsuls[parts.Addr.String()] = parts s.localLock.Unlock() } + + // If we're still expecting, and they are too, check servers. + if s.config.Expect != 0 && parts.Expect != 0 { + index, err := s.raftStore.LastIndex() + if err == nil && index == 0 { + members := s.serfLAN.Members() + addrs := make([]net.Addr, 0) + for _, member := range members { + valid, p := isConsulServer(member) + if valid { + if p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) + return + } else { + addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) + } + } + } + + if len(addrs) >= s.config.Expect { + // we have enough nodes, set peers. + + future := s.raft.SetPeers(addrs) + + if err := future.Error(); err != nil { + s.logger.Printf("[ERR] consul: failed to leave expect mode and set peers: %v", err) + } else { + // we've left expect mode, don't enter this again + s.config.Expect = 0 + } + } + } else if err != nil { + s.logger.Printf("[ERR] consul: error retrieving index: %v", err) + } + } } } diff --git a/consul/server.go b/consul/server.go index 91eafb19b..d431ebd1d 100644 --- a/consul/server.go +++ b/consul/server.go @@ -254,8 +254,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w // setupRaft is used to setup and initialize Raft func (s *Server) setupRaft() error { - // If we are in bootstrap or expect mode, enable a single node cluster - if s.config.Bootstrap || s.config.Expect != 0 { + // If we are in bootstrap mode, enable a single node cluster + if s.config.Bootstrap { s.config.RaftConfig.EnableSingleNode = true } From 93034084ce21abd43e33e3cc2089c0bc7dab422b Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 10:32:19 -0700 Subject: [PATCH 05/43] agent: Fixing missing copy of RejoinAfterLeave flag. #110 --- command/agent/agent.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command/agent/agent.go b/command/agent/agent.go index 65654ee04..472adf53d 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -171,6 +171,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.Bootstrap { base.Bootstrap = true } + if a.config.RejoinAfterLeave { + base.RejoinAfterLeave = true + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } From df637c707052b498d9352d41fdbe42265b0d7f49 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 10:35:42 -0700 Subject: [PATCH 06/43] CHANGELOG updates --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 640c94104..8b6834119 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## 0.3.1 (Unreleased) + +BUG FIXES: + + * Fixed issue with service re-registration [GH-216] + * Fixed handling of `-rejoin` flag + +IMPROVEMENTS: + + * Improved handling of Serf snapshot data + * Increase reliability of failure detector + + ## 0.3.0 (June 13, 2014) FEATURES: From fff6546c75fc36f2d01b96f28794fe926ff97600 Mon Sep 17 00:00:00 2001 From: Robert Xu Date: Wed, 18 Jun 2014 18:47:05 -0400 Subject: [PATCH 07/43] Minor cleanup to logic and testsuite. Signed-off-by: Robert Xu --- consul/serf.go | 2 +- consul/server.go | 4 +++- consul/server_test.go | 14 ++++++++------ consul/util.go | 12 ++++++++---- consul/util_test.go | 9 ++++----- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/consul/serf.go b/consul/serf.go index 8a34cc9b3..c23c10c81 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -160,7 +160,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { addrs := make([]net.Addr, 0) for _, member := range members { valid, p := isConsulServer(member) - if valid { + if valid && p.Datacenter == parts.Datacenter { if p.Expect != parts.Expect { s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) return diff --git a/consul/server.go b/consul/server.go index d431ebd1d..d126e2343 100644 --- a/consul/server.go +++ b/consul/server.go @@ -234,7 +234,9 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } - conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + if s.config.Expect != 0 { + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch diff --git a/consul/server_test.go b/consul/server_test.go index a00f7f34a..109f5081b 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -353,7 +353,7 @@ func TestServer_Expect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) @@ -375,18 +375,20 @@ func TestServer_Expect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 3, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 3 peers: %v", err) }) testutil.WaitForResult(func() (bool, error) { p3, _ = s3.raftPeers.Peers() - return len(p3) == 3, errors.New(fmt.Sprintf("%v", p1)) + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p3)) }, func(err error) { t.Fatalf("should have 3 peers: %v", err) }) + // check if there is one leader now + testutil.WaitForLeader(t, s1.RPC, "dc1") } func TestServer_BadExpect(t *testing.T) { @@ -425,7 +427,7 @@ func TestServer_BadExpect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) @@ -447,14 +449,14 @@ func TestServer_BadExpect(t *testing.T) { testutil.WaitForResult(func() (bool, error) { p2, _ = s2.raftPeers.Peers() - return len(p2) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) testutil.WaitForResult(func() (bool, error) { p3, _ = s3.raftPeers.Peers() - return len(p3) == 0, errors.New(fmt.Sprintf("%v", p1)) + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p3)) }, func(err error) { t.Fatalf("should have 0 peers: %v", err) }) diff --git a/consul/util.go b/consul/util.go index dc5b6ef2a..00815ea10 100644 --- a/consul/util.go +++ b/consul/util.go @@ -86,10 +86,14 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] - expect_str := m.Tags["expect"] - expect, err := strconv.Atoi(expect_str) - if err != nil { - return false, nil + expect := 0 + expect_str, ok := m.Tags["expect"] + var err error + if ok { + expect, err = strconv.Atoi(expect_str) + if err != nil { + return false, nil + } } port_str := m.Tags["port"] diff --git a/consul/util_test.go b/consul/util_test.go index e360f523c..107146b52 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -41,11 +41,10 @@ func TestIsConsulServer(t *testing.T) { Name: "foo", Addr: net.IP([]byte{127, 0, 0, 1}), Tags: map[string]string{ - "expect": "0", - "role": "consul", - "dc": "east-aws", - "port": "10000", - "vsn": "1", + "role": "consul", + "dc": "east-aws", + "port": "10000", + "vsn": "1", }, } valid, parts := isConsulServer(m) From 406d19f483654f7d598773bb5d96cafa21a4bbec Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 18 Jun 2014 16:15:28 -0700 Subject: [PATCH 08/43] consul: Minor cleanups --- consul/serf.go | 91 +++++++++++++++++++++++++++---------------- consul/server_test.go | 2 +- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/consul/serf.go b/consul/serf.go index c23c10c81..e31abef1a 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -146,49 +146,72 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { s.remoteLock.Unlock() // Add to the local list as well - if !wan { + if !wan && parts.Datacenter == s.config.Datacenter { s.localLock.Lock() s.localConsuls[parts.Addr.String()] = parts s.localLock.Unlock() } - // If we're still expecting, and they are too, check servers. - if s.config.Expect != 0 && parts.Expect != 0 { - index, err := s.raftStore.LastIndex() - if err == nil && index == 0 { - members := s.serfLAN.Members() - addrs := make([]net.Addr, 0) - for _, member := range members { - valid, p := isConsulServer(member) - if valid && p.Datacenter == parts.Datacenter { - if p.Expect != parts.Expect { - s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) - return - } else { - addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) - } - } - } - - if len(addrs) >= s.config.Expect { - // we have enough nodes, set peers. - - future := s.raft.SetPeers(addrs) - - if err := future.Error(); err != nil { - s.logger.Printf("[ERR] consul: failed to leave expect mode and set peers: %v", err) - } else { - // we've left expect mode, don't enter this again - s.config.Expect = 0 - } - } - } else if err != nil { - s.logger.Printf("[ERR] consul: error retrieving index: %v", err) - } + // If we still expecting to bootstrap, may need to handle this + if s.config.Expect != 0 { + s.maybeBootstrap() } } } +// maybeBootsrap is used to handle bootstrapping when a new consul server joins +func (s *Server) maybeBootstrap() { + index, err := s.raftStore.LastIndex() + if err != nil { + s.logger.Printf("[ERR] consul: failed to read last raft index: %v", err) + return + } + + // Bootstrap can only be done if there are no committed logs, + // remove our expectations of bootstrapping + if index != 0 { + s.config.Expect = 0 + return + } + + // Scan for all the known servers + members := s.serfLAN.Members() + addrs := make([]net.Addr, 0) + for _, member := range members { + valid, p := isConsulServer(member) + if !valid { + continue + } + if p.Datacenter != s.config.Datacenter { + s.logger.Printf("[ERR] consul: Member %v has a conflicting datacenter, ignoring", member) + continue + } + if p.Expect != 0 && p.Expect != s.config.Expect { + s.logger.Printf("[ERR] consul: Member %v has a conflicting expect value. All nodes should expect the same number.", member) + return + } + if p.Bootstrap { + s.logger.Printf("[ERR] consul: Member %v has bootstrap mode. Expect disabled.", member) + return + } + addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) + } + + // Skip if we haven't met the minimum expect count + if len(addrs) < s.config.Expect { + return + } + + // Update the peer set + s.logger.Printf("[INFO] consul: Attempting bootstrap with nodes: %v", addrs) + if err := s.raft.SetPeers(addrs).Error(); err != nil { + s.logger.Printf("[ERR] consul: failed to bootstrap peers: %v", err) + } + + // Bootstrapping comlete, don't enter this again + s.config.Expect = 0 +} + // nodeFailed is used to handle fail events on both the serf clustes func (s *Server) nodeFailed(me serf.MemberEvent, wan bool) { for _, m := range me.Members { diff --git a/consul/server_test.go b/consul/server_test.go index 109f5081b..9ad01b4ae 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -329,7 +329,7 @@ func TestServer_Expect(t *testing.T) { defer os.RemoveAll(dir2) defer s2.Shutdown() - dir3, s3 := testServerDCExpect(t, "dc1", 3) + dir3, s3 := testServerDCExpect(t, "dc1", 0) defer os.RemoveAll(dir3) defer s3.Shutdown() From 6e9af77bff7b53fe41a8ca4dedd711690fe9df92 Mon Sep 17 00:00:00 2001 From: "Philip K. Warren" Date: Thu, 19 Jun 2014 13:53:43 -0500 Subject: [PATCH 09/43] Fix a small typo in RPC docs. --- website/source/docs/agent/rpc.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/agent/rpc.html.markdown b/website/source/docs/agent/rpc.html.markdown index dcc3583f5..293040a99 100644 --- a/website/source/docs/agent/rpc.html.markdown +++ b/website/source/docs/agent/rpc.html.markdown @@ -199,7 +199,7 @@ There is no request body, or special response body. ### stats -The stats command is used to provide operator information for debugginer. +The stats command is used to provide operator information for debugging. There is no request body, the response body looks like: ``` From a721c818de8a9d674270cd27c3f4cf346b4a3a5e Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Thu, 19 Jun 2014 16:59:57 -0400 Subject: [PATCH 10/43] ui: display session name on list of sessions under locked key --- ui/index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/index.html b/ui/index.html index ce40de71c..b8f35f011 100644 --- a/ui/index.html +++ b/ui/index.html @@ -247,9 +247,9 @@ {{#link-to 'nodes.show' model.session.Node tagName="div" href=false class="list-group-item list-condensed-link" }}
- {{session.Node}} + {{ sessionName session }} - {{session.ID}} + {{session.Node}}
{{/link-to}} From 80b86c9ee95180ac9d781e132b4f62873d77de12 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Thu, 19 Jun 2014 17:08:48 -0700 Subject: [PATCH 11/43] Rename Expect to BootstrapExpect. Fixes #223. --- command/agent/agent.go | 4 ++-- command/agent/command.go | 21 +++++++++------------ command/agent/config.go | 24 ++++++++++++------------ command/agent/config_test.go | 16 ++++++++-------- consul/config.go | 4 ++-- consul/serf.go | 10 +++++----- consul/server.go | 4 ++-- consul/server_test.go | 2 +- 8 files changed, 41 insertions(+), 44 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 75ff489ec..0d8cecfdf 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -175,8 +175,8 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.RejoinAfterLeave { base.RejoinAfterLeave = true } - if a.config.Expect != 0 { - base.Expect = a.config.Expect + if a.config.BootstrapExpect != 0 { + base.BootstrapExpect = a.config.BootstrapExpect } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) diff --git a/command/agent/command.go b/command/agent/command.go index 25e04dc44..f1f7b0d0b 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -63,7 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") - cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") + cmdFlags.IntVar(&cmdConfig.BootstrapExpect, "bootstrap-expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -130,27 +130,24 @@ func (c *Command) readConfig() *Config { } // Expect can only work when acting as a server - if config.Expect != 0 && !config.Server { + if config.BootstrapExpect != 0 && !config.Server { c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") return nil } // Expect & Bootstrap are mutually exclusive - if config.Expect != 0 && config.Bootstrap { + if config.BootstrapExpect != 0 && config.Bootstrap { c.Ui.Error("Bootstrap cannot be provided with an expected server count") return nil } // Warn if we are in expect mode - if config.Expect != 0 { - if config.Expect == 1 { - // just use bootstrap mode - c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") - config.Expect = 0 - config.Bootstrap = true - } else { - c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) - } + if config.BootstrapExpect == 1 { + c.Ui.Error("WARNING: BootstrapExpect Mode is specified as 1; this is the same as Bootstrap mode.") + config.BootstrapExpect = 0 + config.Bootstrap = true + } else if config.BootstrapExpect > 0 { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, expecting %d servers", config.BootstrapExpect)) } // Warn if we are in bootstrap mode diff --git a/command/agent/config.go b/command/agent/config.go index c3631429a..f08d545b1 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -65,9 +65,9 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` - // Expect tries to automatically bootstrap the Consul cluster, + // BootstrapExpect tries to automatically bootstrap the Consul cluster, // by witholding peers until enough servers join. - Expect int `mapstructure:"expect"` + BootstrapExpect int `mapstructure:"bootstrap_expect"` // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part @@ -223,14 +223,14 @@ type dirEnts []os.FileInfo // DefaultConfig is used to return a sane default configuration func DefaultConfig() *Config { return &Config{ - Bootstrap: false, - Expect: 0, - Server: false, - Datacenter: consul.DefaultDC, - Domain: "consul.", - LogLevel: "INFO", - ClientAddr: "127.0.0.1", - BindAddr: "0.0.0.0", + Bootstrap: false, + BootstrapExpect: 0, + Server: false, + Datacenter: consul.DefaultDC, + Domain: "consul.", + LogLevel: "INFO", + ClientAddr: "127.0.0.1", + BindAddr: "0.0.0.0", Ports: PortConfig{ DNS: 8600, HTTP: 8500, @@ -455,8 +455,8 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } - if b.Expect != 0 { - result.Expect = b.Expect + if b.BootstrapExpect != 0 { + result.BootstrapExpect = b.BootstrapExpect } if b.Datacenter != "" { result.Datacenter = b.Datacenter diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 0225630d0..0c6db15e1 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -94,7 +94,7 @@ func TestDecodeConfig(t *testing.T) { } // Expect bootstrap - input = `{"server": true, "expect": 3}` + input = `{"server": true, "bootstrap_expect": 3}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) if err != nil { t.Fatalf("err: %s", err) @@ -104,7 +104,7 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } - if config.Expect != 3 { + if config.BootstrapExpect != 3 { t.Fatalf("bad: %#v", config) } @@ -441,7 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, - Expect: 0, + BootstrapExpect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -459,11 +459,11 @@ func TestMergeConfig(t *testing.T) { } b := &Config{ - Bootstrap: true, - Expect: 3, - Datacenter: "dc2", - DataDir: "/tmp/bar", - DNSRecursor: "127.0.0.2:1001", + Bootstrap: true, + BootstrapExpect: 3, + Datacenter: "dc2", + DataDir: "/tmp/bar", + DNSRecursor: "127.0.0.2:1001", DNSConfig: DNSConfig{ NodeTTL: 10 * time.Second, ServiceTTL: map[string]time.Duration{ diff --git a/consul/config.go b/consul/config.go index ae6c48282..fe4bf6001 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,10 +44,10 @@ type Config struct { // other nodes being present Bootstrap bool - // Expect mode is used to automatically bring up a collection of + // BootstrapExpect mode is used to automatically bring up a collection of // Consul servers. This can be used to automatically bring up a collection // of nodes. - Expect int + BootstrapExpect int // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/serf.go b/consul/serf.go index e31abef1a..37aae2725 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -153,7 +153,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { } // If we still expecting to bootstrap, may need to handle this - if s.config.Expect != 0 { + if s.config.BootstrapExpect != 0 { s.maybeBootstrap() } } @@ -170,7 +170,7 @@ func (s *Server) maybeBootstrap() { // Bootstrap can only be done if there are no committed logs, // remove our expectations of bootstrapping if index != 0 { - s.config.Expect = 0 + s.config.BootstrapExpect = 0 return } @@ -186,7 +186,7 @@ func (s *Server) maybeBootstrap() { s.logger.Printf("[ERR] consul: Member %v has a conflicting datacenter, ignoring", member) continue } - if p.Expect != 0 && p.Expect != s.config.Expect { + if p.Expect != 0 && p.Expect != s.config.BootstrapExpect { s.logger.Printf("[ERR] consul: Member %v has a conflicting expect value. All nodes should expect the same number.", member) return } @@ -198,7 +198,7 @@ func (s *Server) maybeBootstrap() { } // Skip if we haven't met the minimum expect count - if len(addrs) < s.config.Expect { + if len(addrs) < s.config.BootstrapExpect { return } @@ -209,7 +209,7 @@ func (s *Server) maybeBootstrap() { } // Bootstrapping comlete, don't enter this again - s.config.Expect = 0 + s.config.BootstrapExpect = 0 } // nodeFailed is used to handle fail events on both the serf clustes diff --git a/consul/server.go b/consul/server.go index d126e2343..5f2a7635e 100644 --- a/consul/server.go +++ b/consul/server.go @@ -234,8 +234,8 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } - if s.config.Expect != 0 { - conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + if s.config.BootstrapExpect != 0 { + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.BootstrapExpect) } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput diff --git a/consul/server_test.go b/consul/server_test.go index 9ad01b4ae..70aa5811f 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -93,7 +93,7 @@ func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { dir, config := testServerConfig(t, name) config.Datacenter = dc config.Bootstrap = false - config.Expect = expect + config.BootstrapExpect = expect server, err := NewServer(config) if err != nil { t.Fatalf("err: %v", err) From 0a2476b20ef8e7c15fc9b300c2b9155f63fb84d8 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 22 Jun 2014 12:49:51 -0700 Subject: [PATCH 12/43] Restore the 0.2 TLS verification behavior. Namely, don't check the DNS names in TLS certificates when connecting to other servers. As of golang 1.3, crypto/tls no longer natively supports doing partial verification (verifying the cert issuer but not the hostname), so we have to disable verification entirely and then do the issuer verification ourselves. Fortunately, crypto/x509 makes this relatively straightforward. If the "server_name" configuration option is passed, we preserve the existing behavior of checking that server name everywhere. No option is provided to retain the current behavior of checking the remote certificate against the local node name, since that behavior seems clearly buggy and unintentional, and I have difficulty imagining it is actually being used anywhere. It would be relatively straightforward to restore if desired, however. --- consul/client.go | 6 ++-- consul/config.go | 68 +++++++++++++++++++++++++++++++++++++++---- consul/config_test.go | 43 ++++++++++++++++++++------- consul/pool.go | 6 +++- consul/raft_rpc.go | 5 +++- consul/server.go | 9 ++---- 6 files changed, 109 insertions(+), 28 deletions(-) diff --git a/consul/client.go b/consul/client.go index 5b1ea2946..92d923195 100644 --- a/consul/client.go +++ b/consul/client.go @@ -88,10 +88,8 @@ func NewClient(config *Config) (*Client, error) { // Create the tlsConfig var tlsConfig *tls.Config var err error - if config.VerifyOutgoing { - if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { - return nil, err - } + if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { + return nil, err } // Create a logger diff --git a/consul/config.go b/consul/config.go index 6000177a8..490488049 100644 --- a/consul/config.go +++ b/consul/config.go @@ -172,16 +172,21 @@ func (c *Config) KeyPair() (*tls.Certificate, error) { return &cert, err } -// OutgoingTLSConfig generates a TLS configuration for outgoing requests +// OutgoingTLSConfig generates a TLS configuration for outgoing +// requests. It will return a nil config if this configuration should +// not use TLS for outgoing connections. func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { + if !c.VerifyOutgoing { + return nil, nil + } // Create the tlsConfig tlsConfig := &tls.Config{ - ServerName: c.ServerName, RootCAs: x509.NewCertPool(), - InsecureSkipVerify: !c.VerifyOutgoing, + InsecureSkipVerify: true, } - if tlsConfig.ServerName == "" { - tlsConfig.ServerName = c.NodeName + if c.ServerName != "" { + tlsConfig.ServerName = c.ServerName + tlsConfig.InsecureSkipVerify = false } // Ensure we have a CA if VerifyOutgoing is set @@ -206,6 +211,59 @@ func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { return tlsConfig, nil } +// Wrap a net.Conn into a tls connection, performing any additional +// verification as needed. +// +// As of go 1.3, crypto/tls only supports either doing no certificate +// verification, or doing full verification including of the peer's +// DNS name. For consul, we want to validate that the certificate is +// signed by a known CA, but because consul doesn't use DNS names for +// node names, we don't verify the certificate DNS names. Since go 1.3 +// no longer supports this mode of operation, we have to do it +// manually. +func wrapTLSClient(conn net.Conn, tlsConfig *tls.Config) (net.Conn, error) { + var err error + var tlsConn *tls.Conn + + tlsConn = tls.Client(conn, tlsConfig) + + // If crypto/tls is doing verification, there's no need to do + // our own. + if tlsConfig.InsecureSkipVerify == false { + return tlsConn, nil + } + + if err = tlsConn.Handshake(); err != nil { + tlsConn.Close() + return nil, err + } + + // The following is lightly-modified from the doFullHandshake + // method in crypto/tls's handshake_client.go. + opts := x509.VerifyOptions{ + Roots: tlsConfig.RootCAs, + CurrentTime: time.Now(), + DNSName: "", + Intermediates: x509.NewCertPool(), + } + + certs := tlsConn.ConnectionState().PeerCertificates + for i, cert := range certs { + if i == 0 { + continue + } + opts.Intermediates.AddCert(cert) + } + + _, err = certs[0].Verify(opts) + if err != nil { + tlsConn.Close() + return nil, err + } + + return tlsConn, err +} + // IncomingTLSConfig generates a TLS configuration for incoming requests func (c *Config) IncomingTLSConfig() (*tls.Config, error) { // Create the tlsConfig diff --git a/consul/config_test.go b/consul/config_test.go index c6081603e..dc20cf942 100644 --- a/consul/config_test.go +++ b/consul/config_test.go @@ -78,14 +78,8 @@ func TestConfig_OutgoingTLS_OnlyCA(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - if tls == nil { - t.Fatalf("expected config") - } - if len(tls.RootCAs.Subjects()) != 1 { - t.Fatalf("expect root cert") - } - if !tls.InsecureSkipVerify { - t.Fatalf("expect to skip verification") + if tls != nil { + t.Fatalf("expected no config") } } @@ -104,8 +98,35 @@ func TestConfig_OutgoingTLS_VerifyOutgoing(t *testing.T) { if len(tls.RootCAs.Subjects()) != 1 { t.Fatalf("expect root cert") } + if tls.ServerName != "" { + t.Fatalf("expect no server name verification") + } + if !tls.InsecureSkipVerify { + t.Fatalf("should skip built-in verification") + } +} + +func TestConfig_OutgoingTLS_ServerName(t *testing.T) { + conf := &Config{ + VerifyOutgoing: true, + CAFile: "../test/ca/root.cer", + ServerName: "consul.example.com", + } + tls, err := conf.OutgoingTLSConfig() + if err != nil { + t.Fatalf("err: %v", err) + } + if tls == nil { + t.Fatalf("expected config") + } + if len(tls.RootCAs.Subjects()) != 1 { + t.Fatalf("expect root cert") + } + if tls.ServerName != "consul.example.com" { + t.Fatalf("expect server name") + } if tls.InsecureSkipVerify { - t.Fatalf("should not skip verification") + t.Fatalf("should not skip built-in verification") } } @@ -126,8 +147,8 @@ func TestConfig_OutgoingTLS_WithKeyPair(t *testing.T) { if len(tls.RootCAs.Subjects()) != 1 { t.Fatalf("expect root cert") } - if tls.InsecureSkipVerify { - t.Fatalf("should not skip verification") + if !tls.InsecureSkipVerify { + t.Fatalf("should skip verification") } if len(tls.Certificates) != 1 { t.Fatalf("expected client cert") diff --git a/consul/pool.go b/consul/pool.go index 804a900f5..3b7e80c29 100644 --- a/consul/pool.go +++ b/consul/pool.go @@ -221,7 +221,11 @@ func (p *ConnPool) getNewConn(addr net.Addr, version int) (*Conn, error) { } // Wrap the connection in a TLS client - conn = tls.Client(conn, p.tlsConfig) + conn, err = wrapTLSClient(conn, p.tlsConfig) + if err != nil { + conn.Close() + return nil, err + } } // Switch the multiplexing based on version diff --git a/consul/raft_rpc.go b/consul/raft_rpc.go index 1221ce06f..1024cd987 100644 --- a/consul/raft_rpc.go +++ b/consul/raft_rpc.go @@ -94,7 +94,10 @@ func (l *RaftLayer) Dial(address string, timeout time.Duration) (net.Conn, error } // Wrap the connection in a TLS client - conn = tls.Client(conn, l.tlsConfig) + conn, err = wrapTLSClient(conn, l.tlsConfig) + if err != nil { + return nil, err + } } // Write the Raft byte to set the mode diff --git a/consul/server.go b/consul/server.go index e7dd195f3..8af3fd4ef 100644 --- a/consul/server.go +++ b/consul/server.go @@ -145,12 +145,9 @@ func NewServer(config *Config) (*Server, error) { } // Create the tlsConfig for outgoing connections - var tlsConfig *tls.Config - var err error - if config.VerifyOutgoing { - if tlsConfig, err = config.OutgoingTLSConfig(); err != nil { - return nil, err - } + tlsConfig, err := config.OutgoingTLSConfig() + if err != nil { + return nil, err } // Get the incoming tls config From 627b2e455f83d41b2b84cfc4dd5bfc11d6eb3056 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 29 Jun 2014 18:11:32 -0700 Subject: [PATCH 13/43] Add some basic smoke tests for wrapTLSclient. Check the success case, and check that we reject a self-signed certificate. --- consul/config.go | 4 +- consul/config_test.go | 96 ++++++++++++++++++++++++++++++++++ test/key/ssl-cert-snakeoil.key | 28 ++++++++++ test/key/ssl-cert-snakeoil.pem | 17 ++++++ 4 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 test/key/ssl-cert-snakeoil.key create mode 100644 test/key/ssl-cert-snakeoil.pem diff --git a/consul/config.go b/consul/config.go index 490488049..9aa0c8623 100644 --- a/consul/config.go +++ b/consul/config.go @@ -211,8 +211,8 @@ func (c *Config) OutgoingTLSConfig() (*tls.Config, error) { return tlsConfig, nil } -// Wrap a net.Conn into a tls connection, performing any additional -// verification as needed. +// Wrap a net.Conn into a client tls connection, performing any +// additional verification as needed. // // As of go 1.3, crypto/tls only supports either doing no certificate // verification, or doing full verification including of the peer's diff --git a/consul/config_test.go b/consul/config_test.go index dc20cf942..1007ffba7 100644 --- a/consul/config_test.go +++ b/consul/config_test.go @@ -3,6 +3,9 @@ package consul import ( "crypto/tls" "crypto/x509" + "io" + "io/ioutil" + "net" "testing" ) @@ -155,6 +158,99 @@ func TestConfig_OutgoingTLS_WithKeyPair(t *testing.T) { } } +func startTLSServer(config *Config) (net.Conn, chan error) { + errc := make(chan error, 1) + + tlsConfigServer, err := config.IncomingTLSConfig() + if err != nil { + errc <- err + return nil, errc + } + + client, server := net.Pipe() + go func() { + tlsServer := tls.Server(server, tlsConfigServer) + if err := tlsServer.Handshake(); err != nil { + errc <- err + } + close(errc) + // Because net.Pipe() is unbuffered, if both sides + // Close() simultaneously, we will deadlock as they + // both send an alert and then block. So we make the + // server read any data from the client until error or + // EOF, which will allow the client to Close(), and + // *then* we Close() the server. + io.Copy(ioutil.Discard, tlsServer) + tlsServer.Close() + }() + return client, errc +} + +func TestConfig_wrapTLS_OK(t *testing.T) { + config := &Config{ + CAFile: "../test/ca/root.cer", + CertFile: "../test/key/ourdomain.cer", + KeyFile: "../test/key/ourdomain.key", + VerifyOutgoing: true, + } + + client, errc := startTLSServer(config) + if client == nil { + t.Fatalf("startTLSServer err: %v", <-errc) + } + + clientConfig, err := config.OutgoingTLSConfig() + if err != nil { + t.Fatalf("OutgoingTLSConfig err: %v", err) + } + + tlsClient, err := wrapTLSClient(client, clientConfig) + if err != nil { + t.Fatalf("wrapTLS err: %v", err) + } else { + tlsClient.Close() + } + err = <-errc + if err != nil { + t.Fatalf("server: %v", err) + } +} + +func TestConfig_wrapTLS_BadCert(t *testing.T) { + serverConfig := &Config{ + CertFile: "../test/key/ssl-cert-snakeoil.pem", + KeyFile: "../test/key/ssl-cert-snakeoil.key", + } + + client, errc := startTLSServer(serverConfig) + if client == nil { + t.Fatalf("startTLSServer err: %v", <-errc) + } + + clientConfig := &Config{ + CAFile: "../test/ca/root.cer", + VerifyOutgoing: true, + } + + clientTLSConfig, err := clientConfig.OutgoingTLSConfig() + if err != nil { + t.Fatalf("OutgoingTLSConfig err: %v", err) + } + + tlsClient, err := wrapTLSClient(client, clientTLSConfig) + if err == nil { + t.Fatalf("wrapTLS no err") + } + if tlsClient != nil { + t.Fatalf("returned a client") + } + + err = <-errc + if err != nil { + t.Fatalf("server: %v", err) + } +} + func TestConfig_IncomingTLS(t *testing.T) { conf := &Config{ VerifyIncoming: true, diff --git a/test/key/ssl-cert-snakeoil.key b/test/key/ssl-cert-snakeoil.key new file mode 100644 index 000000000..22cc4acb1 --- /dev/null +++ b/test/key/ssl-cert-snakeoil.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDYVw5skn/3Ka72 +32ZaCrKtRVoQzan3tghq41KpQe3yZxIZbKy7sbwfdXnXVSwTAbq/3BYi9rya2t/v +W95yZh6JgfrLBvWl9Jo1EttZIxDhzCXGP+MPWm2KdNtHr84JznJbdxRpR0Jb4ykK +2d9dXbLJvCw8eEDFgOVGrj60USMir46sZFRvGWlMi+yHSOE+WQXaU40Dr0ZJqNvd +RNO9BtqpLaecZQaYTvlkyVdhjUE3+gQ0zEAQqpLcWi+zB5/IyR2+KwxDT3vAJumd +G7rIaGatPE8k0Ahb+zMKFFGYCoQ3sjbAbrQmrVtH4SU6ggl+CxpVdxshrK1W05Ms +WAiPw81/AgMBAAECggEAKjDIKlpjxGMHsTOeNV8yu2H0D6TcSefhOl885q9p5UU+ +nWC5Sx19b7EsYtdEcix7LCGS25y86YJX+8kx16OcvvpvW5ru2z+Zt1IHHxocl7yF +fWVGNd9Pz5m8jf12NClj2fyeKW3xPhROE8Srr/yu+nLNObnF//6EOEWRCv9r176C ++dzYvYVNPP48Ug7NpjQB94CBprtJyqvuoXvBPtpARXazVniYEhnzG1Gaj1TiCII5 ++emaMjKcWIEJ5stbBb3lUtqgm8bRNb/qcxoFfqTzHP+hbum9hbRz0KEIlAkm7uAv +S0TlyLuaj+gPQ+LwNX8EhGKUdlK/VM5bj2kq/tg3AQKBgQD/+A8ruHNa5nKGKNzP +dp+hXiL2sSzefMjDa2+sRJ0yftIMqYRfCJwzYumjfyycfCsu1LHainlQjSO6Kkgc +c0xVxnahWyPCQiqZuo9lLx4EVXCdRqWRg+pbyQhTSz90hfWEKD7XWsI8uRkOEnW8 +36FiyovGDFxl0esaKrFNSFdmgQKBgQDYXcSIRJk41f7vL6FVmchpUnVYoD75k9YT +FqEplNMw6gXcqbC2aNH5wj7EJlRboyVpjXV4N0d2Cz6AwREJpr/rYpq68AixXmVs +kTKwevoHm/tln7CN+CyIEy6KXdLp4KoWLFfSG6tHWRwIGFxWEGrrIZS6Eznu4GPe +V2yOnMkz/wKBgC6nXtSALP5PbGZJgl2J6HR3/PVru5rdsZX0ugjzBJfUh6JpL0hH +AHlZOO5k2pO3CgPiHnyPqqbk4rMmy7frx+kGYE7ulqjseGlGmKY/nT/69qij3L+W +BJwwGwVbfLhXRjWNRE7qKub4cbmf4bfIJtkjw7AYRqsERM6jI2fLnKqBAoGAUBzY +CkSsHxlNXa7bI+DfDfBUNs6OwsZ0e3jjj4vlbrUYGo5SOhgxtzKvHt26Wnvb/Gs+ +VZbSROkA6ZeTAWnWogdOl20NKu9yynIwvJusPGkK+qPYMZj0lCXWE7GNyL9A+xjM +I6XPE4nxESZD+jH2BL3YXdWEm+hF0iu4rE1tSm0CgYEAxssvvX7qcfTmxsp1YSHJ +H5j9ifkakci5W2VbCbdMtdOlgIlCFr2JYguaL98jx7WIJ4iH54ue/fbOdlkPCOsz +YGU4TceSRHeEJ7F6c67NOXm8j2TquAW2uYH87w07g2PIUwl/pp439qoDiThA6jEX +2ztyXgNUi7poqehPUoQuvC0= +-----END PRIVATE KEY----- diff --git a/test/key/ssl-cert-snakeoil.pem b/test/key/ssl-cert-snakeoil.pem new file mode 100644 index 000000000..b8ad2c8a6 --- /dev/null +++ b/test/key/ssl-cert-snakeoil.pem @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIICsjCCAZqgAwIBAgIJAMi7aUCplU3VMA0GCSqGSIb3DQEBBQUAMBExDzANBgNV +BAMTBnVidW50dTAeFw0xMjEyMDIwNDQ3MzBaFw0yMjExMzAwNDQ3MzBaMBExDzAN +BgNVBAMTBnVidW50dTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANhX +DmySf/cprvbfZloKsq1FWhDNqfe2CGrjUqlB7fJnEhlsrLuxvB91eddVLBMBur/c +FiL2vJra3+9b3nJmHomB+ssG9aX0mjUS21kjEOHMJcY/4w9abYp020evzgnOclt3 +FGlHQlvjKQrZ311dssm8LDx4QMWA5UauPrRRIyKvjqxkVG8ZaUyL7IdI4T5ZBdpT +jQOvRkmo291E070G2qktp5xlBphO+WTJV2GNQTf6BDTMQBCqktxaL7MHn8jJHb4r +DENPe8Am6Z0bushoZq08TyTQCFv7MwoUUZgKhDeyNsButCatW0fhJTqCCX4LGlV3 +GyGsrVbTkyxYCI/DzX8CAwEAAaMNMAswCQYDVR0TBAIwADANBgkqhkiG9w0BAQUF +AAOCAQEAQaS5yAih5NBV2edX1wkIQfAUElqmzoXvxsozDYy+P+S5tJeFXDSqzTAy +qkd/6qjkBdaARfKUJZeT/jRjqxoNtE9SR4PMOnD4zrqD26ujgZRVtPImbmVxCnMI +1B9LwvhpDHZuPGN5bPp3o+iDYea8zkS3Y31Ic889KSwKBDb1LlNogOdved+2DGd1 +yCxEErImbl4B0+QPrRk2bWbDfKhDfJ2FV+9kWIoEuCQBpr2tj1E5zvTadOVm5P2M +u7kjGl4w0GIAONiMC9l2TwMmPuG1jpM/WjQkG0sTKOCl7xQKgXBNJ78Wm2bfGtgb +shr/PNbS/EyISlUa07+zJtiRnr/EiQ== +-----END CERTIFICATE----- From 84adb708db3260c8ad63a8dcd481d22110ca0275 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Sun, 29 Jun 2014 18:22:44 -0700 Subject: [PATCH 14/43] Fix a Makefile typo. The broken .PHONY declaration is breaking Travis: 'make deps' is now a no-op, because of the new 'deps' directory. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e4a54626..8d8e2f078 100644 --- a/Makefile +++ b/Makefile @@ -30,4 +30,4 @@ web: web-push: ./scripts/website_push.sh -.PNONY: all cov deps integ test web web-push +.PHONY: all cov deps integ test web web-push From 358b473e01ed4ec6c3eb7c467ac7751657d859fa Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 1 Jul 2014 15:02:26 -0700 Subject: [PATCH 15/43] Updating documentation for new bootstrap method --- command/agent/command.go | 2 +- .../source/docs/agent/basics.html.markdown | 3 +- .../source/docs/agent/options.html.markdown | 17 ++-- .../docs/guides/bootstrapping.html.markdown | 84 ++++++++----------- .../source/docs/guides/outage.html.markdown | 2 +- .../source/docs/guides/servers.html.markdown | 3 +- .../intro/getting-started/agent.html.markdown | 13 ++- .../intro/getting-started/join.html.markdown | 9 +- .../getting-started/services.html.markdown | 2 +- 9 files changed, 64 insertions(+), 71 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index f1f7b0d0b..6410383ea 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -537,6 +537,7 @@ Options: -advertise=addr Sets the advertise address to use -bootstrap Sets server to bootstrap mode -bind=0.0.0.0 Sets the bind address for cluster communication + -bootstrap-expect=0 Sets server to expect bootstrap mode. -client=127.0.0.1 Sets the address to bind for client access. This includes RPC, DNS and HTTP -config-file=foo Path to a JSON file to read configuration from. @@ -547,7 +548,6 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent - -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/website/source/docs/agent/basics.html.markdown b/website/source/docs/agent/basics.html.markdown index ba05f8a19..a63d90a04 100644 --- a/website/source/docs/agent/basics.html.markdown +++ b/website/source/docs/agent/basics.html.markdown @@ -57,8 +57,7 @@ There are several important components that `consul agent` outputs: * **Server**: This shows if the agent is running in the server or client mode. Server nodes have the extra burden of participating in the consensus quorum, storing cluster state, and handling queries. Additionally, a server may be - in "bootstrap" mode. The first server must be in this mode to allow additional - servers to join the cluster. Multiple servers cannot be in bootstrap mode, + in "bootstrap" mode. Multiple servers cannot be in bootstrap mode, otherwise the cluster state will be inconsistent. * **Client Addr**: This is the address used for client interfaces to the agent. diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown index c134b73a5..bfafab5e8 100644 --- a/website/source/docs/agent/options.html.markdown +++ b/website/source/docs/agent/options.html.markdown @@ -35,11 +35,16 @@ The options below are all specified on the command-line. as other nodes will treat the non-routability as a failure. * `-bootstrap` - This flag is used to control if a server is in "bootstrap" mode. It is important that - no more than one server *per* datacenter be running in this mode. The initial server **must** be in bootstrap - mode. Technically, a server in bootstrap mode is allowed to self-elect as the Raft leader. It is important - that only a single node is in this mode, because otherwise consistency cannot be guaranteed if multiple - nodes are able to self-elect. Once there are multiple servers in a datacenter, it is generally a good idea - to disable bootstrap mode on all of them. + no more than one server *per* datacenter be running in this mode. Technically, a server in bootstrap mode + is allowed to self-elect as the Raft leader. It is important that only a single node is in this mode, + because otherwise consistency cannot be guaranteed if multiple nodes are able to self-elect. + It is not recommended to use this flag after a cluster has been bootstrapped. + +* `-bootstrap-expect` - This flag provides the number of expected servers in the datacenter. + Either this value should not be provided, or the value must agree with other servers in + the cluster. When provided, Consul waits until the specified number of servers are + available, and then bootstraps the cluster. This allows an initial leader to be elected + automatically. This cannot be used in conjunction with the `-bootstrap` flag. * `-bind` - The address that should be bound to for internal cluster communications. This is an IP address that should be reachable by all other nodes in the cluster. @@ -148,6 +153,8 @@ definitions support being updated during a reload. * `bootstrap` - Equivalent to the `-bootstrap` command-line flag. +* `bootstrap_expect` - Equivalent to the `-bootstrap-expect` command-line flag. + * `bind_addr` - Equivalent to the `-bind` command-line flag. * `client_addr` - Equivalent to the `-client` command-line flag. diff --git a/website/source/docs/guides/bootstrapping.html.markdown b/website/source/docs/guides/bootstrapping.html.markdown index 6339e59cc..472a949f4 100644 --- a/website/source/docs/guides/bootstrapping.html.markdown +++ b/website/source/docs/guides/bootstrapping.html.markdown @@ -6,74 +6,62 @@ sidebar_current: "docs-guides-bootstrapping" # Bootstrapping a Datacenter -When deploying Consul to a datacenter for the first time, there is an initial bootstrapping that -must be done. Generally, the first nodes that are started are the server nodes. Remember that an -agent can run in both client and server mode. Server nodes are responsible for running +Before a Consul cluster can begin to service requests, it is necessary for a server node to +be elected leader. For this reason, the first nodes that are started are generally the server nodes. +Remember that an agent can run in both client and server mode. Server nodes are responsible for running the [consensus protocol](/docs/internals/consensus.html), and storing the cluster state. The client nodes are mostly stateless and rely on the server nodes, so they can be started easily. -The first server that is deployed in a new datacenter must provide the `-bootstrap` [configuration -option](/docs/agent/options.html). This option allows the server to assert leadership of the cluster -without agreement from any other server. This is necessary because at this point, there are no other -servers running in the datacenter! Lets call this first server `Node A`. When starting `Node A` something -like the following will be logged: +The recommended way to bootstrap is to use the `-bootstrap-expect` [configuration +option](/docs/agent/options.html). This options informs Consul of the expected number of +server nodes, and automatically bootstraps when that many servers are available. To prevent +inconsistencies and split-brain situations, all servers should specify the same value for `-bootstrap-expect` +or specify no value at all. Any server that does not specify a value will not attempt to +bootstrap the cluster. - 2014/02/22 19:23:32 [INFO] consul: cluster leadership acquired +There is a [deployment table](/docs/internals/consensus.html#toc_3) that covers various options, +but it is recommended to have 3 or 5 total servers per data center. A single server deployment is _**highly**_ +discouraged as data loss is inevitable in a failure scenario. -Once `Node A` is running, we can start the next set of servers. There is a [deployment table](/docs/internals/consensus.html#toc_3) -that covers various options, but it is recommended to have 3 or 5 total servers per data center. -A single server deployment is _**highly**_ discouraged as data loss is inevitable in a failure scenario. -We start the next servers **without** specifying `-bootstrap`. This is critical, since only one server -should ever be running in bootstrap mode*. Once `Node B` and `Node C` are started, you should see a -message to the effect of: +Suppose we are starting a 3 server cluster, we can start `Node A`, `Node B` and `Node C` providing +the `-bootstrap-expect 3` flag. Once the nodes are started, you should see a message to the effect of: [WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election. -This indicates that the node is not in bootstrap mode, and it will not elect itself as leader. -We can now join these machines together. Since a join operation is symmetric it does not matter -which node initiates it. From `Node B` and `Node C` you can do the following: +This indicates that the nodes are expecting 2 peers, but none are known yet. The servers will not elect +themselves leader to prevent a split-brain. We can now join these machines together. Since a join operation +is symmetric it does not matter which node initiates it. From any node you can do the following: - $ consul join - Successfully joined cluster by contacting 1 nodes. + $ consul join + Successfully joined cluster by contacting 3 nodes. -Alternatively, from `Node A` you can do the following: +Once the join is successful, one of the nodes will output something like: - $ consul join - Successfully joined cluster by contacting 2 nodes. + [INFO] consul: adding server foo (Addr: 127.0.0.2:8300) (DC: dc1) + [INFO] consul: adding server bar (Addr: 127.0.0.1:8300) (DC: dc1) + [INFO] consul: Attempting bootstrap with nodes: [127.0.0.3:8300 127.0.0.2:8300 127.0.0.1:8300] + ... + [INFO] consul: cluster leadership acquired -Once the join is successful, `Node A` should output something like: - - [INFO] raft: Added peer 127.0.0.2:8300, starting replication - .... - [INFO] raft: Added peer 127.0.0.3:8300, starting replication - -Another good check is to run the `consul info` command. When run on `Node A`, you can +As a sanity check, the `consul info` command is a useful tool. It can be used to verify `raft.num_peers` is now 2, and you can view the latest log index under `raft.last_log_index`. -When running `consul info` on `Node B` and `Node C` you should see `raft.last_log_index` +When running `consul info` on the followers, you should see `raft.last_log_index` converge to the same value as the leader begins replication. That value represents the last log entry that has been stored on disk. -This indicates that `Node B` and `Node C` have been added as peers. At this point, -all three nodes see each other as peers, `Node A` is the leader, and replication -should be working. - -The final step is to remove the `-bootstrap` flag. This is important since we don't -want the node to be able to make unilateral decisions in the case of a failure of the -other two nodes. To do this, we send a `SIGINT` to `Node A` to allow it to perform -a graceful leave. Then we remove the `-bootstrap` flag and restart the node. The node -will need to rejoin the cluster, since the graceful exit leaves the cluster. Any transactions -that took place while `Node A` was offline will be replicated and the node will catch up. - Now that the servers are all started and replicating to each other, all the remaining clients can be joined. Clients are much easier, as they can be started and perform a `join` against any existing node. All nodes participate in a gossip protocol to perform basic discovery, so clients will automatically find the servers and register themselves. -
-* If you accidentally start another server with the flag set, do not fret. -Shutdown the node, and remove the `raft/` folder from the data directory. This will -remove the bad state caused by being in `-bootstrap` mode. Then restart the -node and join the cluster normally. -
+It should be noted that it is not strictly necessary to start the server nodes +before the clients, however most operations will fail until the servers are available. + +## Manual Bootstrapping + +In versions of Consul previous to 0.4, bootstrapping was a more manual process. +For a guide on using the `-bootstrap` flag directly, see the [manual bootstrapping guide](/docs/guides/manual-bootstrap.html). + +This is not recommended, as it is more error prone than automatic bootstrapping. diff --git a/website/source/docs/guides/outage.html.markdown b/website/source/docs/guides/outage.html.markdown index 13f437917..893cd6694 100644 --- a/website/source/docs/guides/outage.html.markdown +++ b/website/source/docs/guides/outage.html.markdown @@ -18,7 +18,7 @@ add or remove a server see this page. If you had only a single server and it has failed, simply restart it. -Note that a single server configuration requires the `-bootstrap` flag. +Note that a single server configuration requires the `-bootstrap` or `-bootstrap-expect 1` flag. If that server cannot be recovered, you need to bring up a new server. See the [bootstrapping guide](/docs/guides/bootstrapping.html). Data loss is inevitable, since data was not replicated to any other servers. This diff --git a/website/source/docs/guides/servers.html.markdown b/website/source/docs/guides/servers.html.markdown index 64b4583f0..9cf535bed 100644 --- a/website/source/docs/guides/servers.html.markdown +++ b/website/source/docs/guides/servers.html.markdown @@ -18,8 +18,7 @@ to first add the new nodes and then remove the old nodes. ## Adding New Servers -Adding new servers is generally straightforward. After the initial server, no further -servers should ever be started with the `-bootstrap` flag. Instead, simply start the new +Adding new servers is generally straightforward. Simply start the new server with the `-server` flag. At this point, the server will not be a member of any cluster, and should emit something like: diff --git a/website/source/intro/getting-started/agent.html.markdown b/website/source/intro/getting-started/agent.html.markdown index d9709eb16..1a75c7544 100644 --- a/website/source/intro/getting-started/agent.html.markdown +++ b/website/source/intro/getting-started/agent.html.markdown @@ -20,7 +20,8 @@ will be part of the cluster. For simplicity, we'll run a single Consul agent in server mode right now: ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul +==> WARNING: BootstrapExpect Mode is specified as 1; this is the same as Bootstrap mode. ==> WARNING: Bootstrap mode enabled! Do not enable unless necessary ==> WARNING: It is highly recommended to set GOMAXPROCS higher than 1 ==> Starting Consul agent... @@ -67,15 +68,13 @@ joining clusters in the next section. ``` $ consul members -Armons-MacBook-Air 10.1.10.38:8301 alive role=consul,dc=dc1,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1 +Node Address Status Type Build Protocol +Armons-MacBook-Air 10.1.10.38:8301 alive server 0.3.0 2 ``` The output shows our own node, the address it is running on, its -health state, and some metadata associated with the node. Some important -metadata keys to recognize are the `role` and `dc` keys. These tell you -the service name and the datacenter that member is within. These can be -used to lookup nodes and services using the DNS interface, which is covered -shortly. +health state, its role in the cluster, as well as some versioning information. +Additional metadata can be viewed by providing the `-detailed` flag. The output from the `members` command is generated based on the [gossip protocol](/docs/internals/gossip.html) and is eventually consistent. diff --git a/website/source/intro/getting-started/join.html.markdown b/website/source/intro/getting-started/join.html.markdown index e369b9102..53bc44fa6 100644 --- a/website/source/intro/getting-started/join.html.markdown +++ b/website/source/intro/getting-started/join.html.markdown @@ -34,7 +34,7 @@ will act as our server in this cluster. We're still not making a cluster of servers. ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul \ +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul \ -node=agent-one -bind=172.20.20.10 ... ``` @@ -70,9 +70,10 @@ run `consul members` against each agent, you'll see that both agents now know about each other: ``` -$ consul members -agent-one 172.20.20.10:8301 alive role=consul,dc=dc1,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1 -agent-two 172.20.20.11:8301 alive role=node,dc=dc1,vsn=1,vsn_min=1,vsn_max=1 +$ consul members -detailed +Node Address Status Tags +agent-one 172.20.20.10:8301 alive role=consul,dc=dc1,vsn=2,vsn_min=1,vsn_max=2,port=8300,bootstrap=1 +agent-two 172.20.20.11:8301 alive role=node,dc=dc1,vsn=2,vsn_min=1,vsn_max=2 ```
diff --git a/website/source/intro/getting-started/services.html.markdown b/website/source/intro/getting-started/services.html.markdown index f67cf31d9..8d1329cdb 100644 --- a/website/source/intro/getting-started/services.html.markdown +++ b/website/source/intro/getting-started/services.html.markdown @@ -43,7 +43,7 @@ $ echo '{"service": {"name": "web", "tags": ["rails"], "port": 80}}' \ Now, restart the agent we're running, providing the configuration directory: ``` -$ consul agent -server -bootstrap -data-dir /tmp/consul -config-dir /etc/consul.d +$ consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul -config-dir /etc/consul.d ==> Starting Consul agent... ... [INFO] agent: Synced service 'web' From 3b6e3d184641e02e3bcddfa59ab322810dfae6df Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 1 Jul 2014 15:02:42 -0700 Subject: [PATCH 16/43] website: Keep old bootstrapping docs --- .../guides/manual-bootstrap.html.markdown | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 website/source/docs/guides/manual-bootstrap.html.markdown diff --git a/website/source/docs/guides/manual-bootstrap.html.markdown b/website/source/docs/guides/manual-bootstrap.html.markdown new file mode 100644 index 000000000..f1375031e --- /dev/null +++ b/website/source/docs/guides/manual-bootstrap.html.markdown @@ -0,0 +1,83 @@ +--- +layout: "docs" +page_title: "Manual Bootstrapping" +sidebar_current: "docs-guides-bootstrapping" +--- + +# Manually Bootstrapping a Datacenter + +When deploying Consul to a datacenter for the first time, there is an initial bootstrapping that +must be done. As of Consul 0.4, an [automatic bootstrapping](/docs/guides/bootstrapping.html) is +available and is the recommended approach. However, older versions only support a manual bootstrap +that is documented here. + +Generally, the first nodes that are started are the server nodes. Remember that an +agent can run in both client and server mode. Server nodes are responsible for running +the [consensus protocol](/docs/internals/consensus.html), and storing the cluster state. +The client nodes are mostly stateless and rely on the server nodes, so they can be started easily. + +Manual bootstrapping requires that the first server that is deployed in a new datacenter provide +the `-bootstrap` [configuration option](/docs/agent/options.html). This option allows the server to +assert leadership of the cluster without agreement from any other server. This is necessary because +at this point, there are no other servers running in the datacenter! Lets call this first server `Node A`. +When starting `Node A` something like the following will be logged: + + 2014/02/22 19:23:32 [INFO] consul: cluster leadership acquired + +Once `Node A` is running, we can start the next set of servers. There is a [deployment table](/docs/internals/consensus.html#toc_3) +that covers various options, but it is recommended to have 3 or 5 total servers per data center. +A single server deployment is _**highly**_ discouraged as data loss is inevitable in a failure scenario. +We start the next servers **without** specifying `-bootstrap`. This is critical, since only one server +should ever be running in bootstrap mode*. Once `Node B` and `Node C` are started, you should see a +message to the effect of: + + [WARN] raft: EnableSingleNode disabled, and no known peers. Aborting election. + +This indicates that the node is not in bootstrap mode, and it will not elect itself as leader. +We can now join these machines together. Since a join operation is symmetric it does not matter +which node initiates it. From `Node B` and `Node C` you can do the following: + + $ consul join + Successfully joined cluster by contacting 1 nodes. + +Alternatively, from `Node A` you can do the following: + + $ consul join + Successfully joined cluster by contacting 2 nodes. + +Once the join is successful, `Node A` should output something like: + + [INFO] raft: Added peer 127.0.0.2:8300, starting replication + .... + [INFO] raft: Added peer 127.0.0.3:8300, starting replication + +As a sanity check, the `consul info` command is a useful tool. It can be used to +verify `raft.num_peers` is now 2, and you can view the latest log index under `raft.last_log_index`. +When running `consul info` on the followers, you should see `raft.last_log_index` +converge to the same value as the leader begins replication. That value represents the last +log entry that has been stored on disk. + +This indicates that `Node B` and `Node C` have been added as peers. At this point, +all three nodes see each other as peers, `Node A` is the leader, and replication +should be working. + +The final step is to remove the `-bootstrap` flag. This is important since we don't +want the node to be able to make unilateral decisions in the case of a failure of the +other two nodes. To do this, we send a `SIGINT` to `Node A` to allow it to perform +a graceful leave. Then we remove the `-bootstrap` flag and restart the node. The node +will need to rejoin the cluster, since the graceful exit leaves the cluster. Any transactions +that took place while `Node A` was offline will be replicated and the node will catch up. + +Now that the servers are all started and replicating to each other, all the remaining +clients can be joined. Clients are much easier, as they can be started and perform +a `join` against any existing node. All nodes participate in a gossip protocol to +perform basic discovery, so clients will automatically find the servers and register +themselves. + +
+* If you accidentally start another server with the flag set, do not fret. +Shutdown the node, and remove the `raft/` folder from the data directory. This will +remove the bad state caused by being in `-bootstrap` mode. Then restart the +node and join the cluster normally. +
+ From 14e282dca8006b221a7a6d95e8ab9e18c95fccde Mon Sep 17 00:00:00 2001 From: Ben Scofield Date: Wed, 2 Jul 2014 21:21:37 +0200 Subject: [PATCH 17/43] Improve clarity around changing the `-client` param This is a small change that (IMO) makes more explicit the effect of changing `-client` -- namely, that you'll have to provide `-rpc-addr` to every other consul command you run. --- website/source/docs/agent/basics.html.markdown | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/basics.html.markdown b/website/source/docs/agent/basics.html.markdown index a63d90a04..9061c5d20 100644 --- a/website/source/docs/agent/basics.html.markdown +++ b/website/source/docs/agent/basics.html.markdown @@ -65,9 +65,9 @@ There are several important components that `consul agent` outputs: address is used for other `consul` commands. Other Consul commands such as `consul members` connect to a running agent and use RPC to query and control the agent. By default, this binds only to localhost. If you - change this address or port, you'll have to specify an `-rpc-addr` to commands - such as `consul members` so they know how to talk to the agent. This is also - the address other applications can use over [RPC to control Consul](/docs/agent/rpc.html). + change this address or port, you'll have to specify an `-rpc-addr` whenever + you run commands such as `consul members` so they know how to talk to the + agent. This is also the address other applications can use over [RPC to control Consul](/docs/agent/rpc.html). * **Cluster Addr**: This is the address and ports used for communication between Consul agents in a cluster. Every Consul agent in a cluster does not have to From ba8c0802c748c9309c31aafa300a57cdec87f77a Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Sat, 5 Jul 2014 09:49:10 -0700 Subject: [PATCH 18/43] agent: Fixing passing filter. Fixes #241 --- command/agent/health_endpoint.go | 2 ++ command/agent/health_endpoint_test.go | 37 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/command/agent/health_endpoint.go b/command/agent/health_endpoint.go index 8462e0a4b..3ee02ac5e 100644 --- a/command/agent/health_endpoint.go +++ b/command/agent/health_endpoint.go @@ -117,6 +117,7 @@ func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Requ // filterNonPassing is used to filter out any nodes that have check that are not passing func filterNonPassing(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { n := len(nodes) +OUTER: for i := 0; i < n; i++ { node := nodes[i] for _, check := range node.Checks { @@ -124,6 +125,7 @@ func filterNonPassing(nodes structs.CheckServiceNodes) structs.CheckServiceNodes nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} n-- i-- + continue OUTER } } } diff --git a/command/agent/health_endpoint_test.go b/command/agent/health_endpoint_test.go index c1f75a269..40ceedb0f 100644 --- a/command/agent/health_endpoint_test.go +++ b/command/agent/health_endpoint_test.go @@ -7,6 +7,7 @@ import ( "net/http" "net/http/httptest" "os" + "reflect" "testing" ) @@ -182,3 +183,39 @@ func TestHealthServiceNodes_PassingFilter(t *testing.T) { t.Fatalf("bad: %v", obj) } } + +func TestFilterNonPassing(t *testing.T) { + nodes := structs.CheckServiceNodes{ + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + }, + }, + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + &structs.HealthCheck{ + Status: structs.HealthCritical, + }, + }, + }, + structs.CheckServiceNode{ + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Status: structs.HealthPassing, + }, + }, + }, + } + out := filterNonPassing(nodes) + if len(out) != 1 && reflect.DeepEqual(out[0], nodes[2]) { + t.Fatalf("bad: %v", out) + } +} From c505b49bf82f8ace930008bb8ea3b377d1856ed5 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Thu, 10 Jul 2014 18:06:36 -0700 Subject: [PATCH 19/43] There is no `-statsite` command-line flag. --- website/source/docs/agent/telemetry.html.markdown | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/telemetry.html.markdown b/website/source/docs/agent/telemetry.html.markdown index 473e2832d..1035304cc 100644 --- a/website/source/docs/agent/telemetry.html.markdown +++ b/website/source/docs/agent/telemetry.html.markdown @@ -18,9 +18,10 @@ information to the stderr of the agent. In general, the telemetry information is used for debugging or otherwise getting a better view into what Consul is doing. -Additionally, if the `-statsite` [option](/docs/agent/options.html) is provided, -then the telemetry information will be streamed to a [statsite](http://github.com/armon/statsite) -server where it can be aggregate and flushed to Graphite or any other metrics store. +Additionally, if the `statsite_addr` [configuration option](/docs/agent/options.html) +is provided, then the telemetry information will be streamed to a +[statsite](http://github.com/armon/statsite) server where it can be +aggregate and flushed to Graphite or any other metrics store. Below is an example output: From 95f3ebcb434f4ad1befddc0bdb5929f1180e6c45 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Thu, 10 Jul 2014 18:16:47 -0700 Subject: [PATCH 20/43] While we're at it, make statsite_addr work at all. --- command/agent/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command/agent/config.go b/command/agent/config.go index f08d545b1..9a6a043f8 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -500,6 +500,9 @@ func MergeConfig(a, b *Config) *Config { if b.SkipLeaveOnInt == true { result.SkipLeaveOnInt = true } + if b.StatsiteAddr != "" { + result.StatsiteAddr = b.StatsiteAddr + } if b.EnableDebug { result.EnableDebug = true } From 8b8184d942fa834eccd2c2cee5a28515ae13a343 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 15 Jul 2014 09:50:39 -0700 Subject: [PATCH 21/43] website: remove old docs on the -encrypt flag --- website/source/docs/agent/encryption.html.markdown | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/source/docs/agent/encryption.html.markdown b/website/source/docs/agent/encryption.html.markdown index 174a2aab4..5671628b8 100644 --- a/website/source/docs/agent/encryption.html.markdown +++ b/website/source/docs/agent/encryption.html.markdown @@ -28,7 +28,10 @@ With that key, you can enable encryption on the agent. You can verify encryption is enabled because the output will include "Encrypted: true". ``` -$ consul agent -data=/tmp/consul -encrypt=cg8StVXbQJ0gPvMd9o7yrg== +$ cat encrypt.json +{"encrypt": "cg8StVXbQJ0gPvMd9o7yrg=="} + +$ consul agent -data=/tmp/consul -config-file encrypt.json ==> Starting Consul agent... ==> Starting Consul agent RPC... ==> Consul agent running! From c13f9f2a98915ae821b9dcf71d1a2d00480058d8 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Wed, 16 Jul 2014 15:11:45 -0700 Subject: [PATCH 22/43] agent: Fixing issue with excessive failed node filtering --- command/agent/dns.go | 2 ++ command/agent/dns_test.go | 42 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 697f314fc..4a0bd14f0 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -471,6 +471,7 @@ RPC: // health checks to prevent routing to unhealthy nodes func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { n := len(nodes) +OUTER: for i := 0; i < n; i++ { node := nodes[i] for _, check := range node.Checks { @@ -480,6 +481,7 @@ func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs. nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} n-- i-- + continue OUTER } } } diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index 3a2804a4c..cf1ae791f 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -651,6 +651,40 @@ func TestDNS_ServiceLookup_FilterCritical(t *testing.T) { t.Fatalf("err: %v", err) } + args3 := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.2", + Service: &structs.NodeService{ + Service: "db", + Tags: []string{"master"}, + Port: 12345, + }, + Check: &structs.HealthCheck{ + CheckID: "db", + Name: "db", + ServiceID: "db", + Status: structs.HealthCritical, + }, + } + if err := srv.agent.RPC("Catalog.Register", args3, &out); err != nil { + t.Fatalf("err: %v", err) + } + + args4 := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "baz", + Address: "127.0.0.3", + Service: &structs.NodeService{ + Service: "db", + Tags: []string{"master"}, + Port: 12345, + }, + } + if err := srv.agent.RPC("Catalog.Register", args4, &out); err != nil { + t.Fatalf("err: %v", err) + } + m := new(dns.Msg) m.SetQuestion("db.service.consul.", dns.TypeANY) @@ -662,9 +696,15 @@ func TestDNS_ServiceLookup_FilterCritical(t *testing.T) { } // Should get no answer since we are failing! - if len(in.Answer) != 0 { + if len(in.Answer) != 1 { t.Fatalf("Bad: %#v", in) } + + resp := in.Answer[0] + aRec := resp.(*dns.A) + if aRec.A.String() != "127.0.0.3" { + t.Fatalf("Bad: %#v", in.Answer[0]) + } } func TestDNS_ServiceLookup_Randomize(t *testing.T) { From a38eaa1e43a121b10730bae6ef9c68f34867e3ca Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Thu, 17 Jul 2014 09:48:38 -0400 Subject: [PATCH 23/43] ui: scroll extra items on nodes/services fixes #232 --- ui/styles/_lists.scss | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/styles/_lists.scss b/ui/styles/_lists.scss index 8bded01e9..7e3cfee6f 100644 --- a/ui/styles/_lists.scss +++ b/ui/styles/_lists.scss @@ -16,6 +16,8 @@ padding-left: 0px; color: $gray; font-size: 13px; + overflow: scroll; + height: 30px; } .list-group-item-heading { From 84e5bd27274b9a6985c9352e2e0a549982e19862 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Thu, 17 Jul 2014 10:57:15 -0700 Subject: [PATCH 24/43] agent: Provide better DNS setup error messages --- command/agent/dns.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 4a0bd14f0..522376e15 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -84,14 +84,14 @@ func NewDNSServer(agent *Agent, config *DNSConfig, logOutput io.Writer, domain, go func() { err := server.ListenAndServe() srv.logger.Printf("[ERR] dns: error starting udp server: %v", err) - errCh <- err + errCh <- fmt.Errorf("dns udp setup failed: %v", err) }() errChTCP := make(chan error, 1) go func() { err := serverTCP.ListenAndServe() srv.logger.Printf("[ERR] dns: error starting tcp server: %v", err) - errChTCP <- err + errChTCP <- fmt.Errorf("dns tcp setup failed: %v", err) }() // Check the server is running, do a test lookup @@ -107,7 +107,7 @@ func NewDNSServer(agent *Agent, config *DNSConfig, logOutput io.Writer, domain, c := new(dns.Client) in, _, err := c.Exchange(m, bind) if err != nil { - checkCh <- err + checkCh <- fmt.Errorf("dns test query failed: %v", err) return } From 541081cefb6f6c46d1da6cbccf5b3f9bc8e5a72f Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 10:58:37 -0400 Subject: [PATCH 25/43] Cutting v0.3.1 --- CHANGELOG.md | 11 ++++++++++- bench/bench-aws.json | 4 ++-- bench/bench.json | 4 ++-- demo/vagrant-cluster/Vagrantfile | 2 +- version.go | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6834119..c2954ce23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,23 @@ -## 0.3.1 (Unreleased) +## 0.3.1 (July 21, 2014) + +FEATURES: + + * Improved bootstrapping process, thanks to @robxu9 BUG FIXES: * Fixed issue with service re-registration [GH-216] * Fixed handling of `-rejoin` flag + * Restored 0.2 TLS behavior, thanks to @nelhage [GH-233] + * Fix the statsite flags, thanks to @nelhage [GH-243] + * Fixed filters on criticial / non-passing checks [GH-241] IMPROVEMENTS: + * UI Improvements * Improved handling of Serf snapshot data * Increase reliability of failure detector + * More useful logging messages ## 0.3.0 (June 13, 2014) diff --git a/bench/bench-aws.json b/bench/bench-aws.json index 2adb91ccd..0cd03a14e 100644 --- a/bench/bench-aws.json +++ b/bench/bench-aws.json @@ -53,8 +53,8 @@ "sudo mkdir /etc/consul.d", "sudo apt-get update", "sudo apt-get install unzip make", - "wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip", - "unzip 0.3.0_linux_amd64.zip", + "wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip", + "unzip 0.3.1_linux_amd64.zip", "sudo mv consul /usr/local/bin/consul", "chmod +x /usr/local/bin/consul" ] diff --git a/bench/bench.json b/bench/bench.json index c8b291055..67603072f 100644 --- a/bench/bench.json +++ b/bench/bench.json @@ -47,8 +47,8 @@ "mkdir /etc/consul.d", "apt-get update", "apt-get install unzip make", - "wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip", - "unzip 0.3.0_linux_amd64.zip", + "wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip", + "unzip 0.3.1_linux_amd64.zip", "mv consul /usr/local/bin/consul", "chmod +x /usr/local/bin/consul" ] diff --git a/demo/vagrant-cluster/Vagrantfile b/demo/vagrant-cluster/Vagrantfile index 5678c544f..427f06e2e 100644 --- a/demo/vagrant-cluster/Vagrantfile +++ b/demo/vagrant-cluster/Vagrantfile @@ -7,7 +7,7 @@ sudo apt-get install -y unzip echo Fetching Consul... cd /tmp/ -wget https://dl.bintray.com/mitchellh/consul/0.3.0_linux_amd64.zip -O consul.zip +wget https://dl.bintray.com/mitchellh/consul/0.3.1_linux_amd64.zip -O consul.zip echo Installing Consul... unzip consul.zip diff --git a/version.go b/version.go index 38fc1914e..a38a77a3f 100644 --- a/version.go +++ b/version.go @@ -4,7 +4,7 @@ package main var GitCommit string // The main version number that is being run at the moment. -const Version = "0.3.0" +const Version = "0.3.1" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release From ed298b0336543197528cd126542f7a11ab505037 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 11:26:53 -0400 Subject: [PATCH 26/43] website: Update dummy --- website/dummy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/dummy.txt b/website/dummy.txt index 6cd25eecc..c04dff067 100644 --- a/website/dummy.txt +++ b/website/dummy.txt @@ -1,4 +1,4 @@ This file doesn't do anything, but we periodically update the number below just to force being able to deploy the website again. -1 +2 From 9936ec93b6b570a635807d1f7302420fc309e647 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 21 Jul 2014 11:34:10 -0400 Subject: [PATCH 27/43] Adding the deps file for 0.3.1 --- deps/v0-3-1.json | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 deps/v0-3-1.json diff --git a/deps/v0-3-1.json b/deps/v0-3-1.json new file mode 100644 index 000000000..14e4551e1 --- /dev/null +++ b/deps/v0-3-1.json @@ -0,0 +1,72 @@ +{ + "ImportPath": "github.com/hashicorp/consul", + "GoVersion": "go1.3", + "Deps": [ + { + "ImportPath": "github.com/armon/circbuf", + "Rev": "f092b4f207b6e5cce0569056fba9e1a2735cb6cf" + }, + { + "ImportPath": "github.com/armon/go-metrics", + "Rev": "02567bbc4f518a43853d262b651a3c8257c3f141" + }, + { + "ImportPath": "github.com/armon/gomdb", + "Rev": "a8e036c4dabe7437014ecf9dbc03c6f6f0766ef8" + }, + { + "ImportPath": "github.com/hashicorp/go-syslog", + "Rev": "ac3963b72ac367e48b1e68a831e62b93fb69091c" + }, + { + "ImportPath": "github.com/hashicorp/logutils", + "Rev": "8e0820fe7ac5eb2b01626b1d99df47c5449eb2d8" + }, + { + "ImportPath": "github.com/hashicorp/memberlist", + "Rev": "e6a282556f0e8f15e9a53dcb0d14912a3c2fb141" + }, + { + "ImportPath": "github.com/hashicorp/raft", + "Rev": "35f5fa082f5a064595d84715b0cf8821f002e9ac" + }, + { + "ImportPath": "github.com/hashicorp/raft-mdb", + "Rev": "9076b4b956c1c4c8a47117608b612bda2cb5f481" + }, + { + "ImportPath": "github.com/hashicorp/serf/serf", + "Comment": "v0.6.3-1-g7f260e7", + "Rev": "7f260e70a89739bd38c1f0bf3b74c0e1c1ee617f" + }, + { + "ImportPath": "github.com/hashicorp/yamux", + "Rev": "35417c7dfab4085d7c921b33e4d5ea6cf9ceef65" + }, + { + "ImportPath": "github.com/inconshreveable/muxado", + "Rev": "f693c7e88ba316d1a0ae3e205e22a01aa3ec2848" + }, + { + "ImportPath": "github.com/miekg/dns", + "Rev": "9af5c1f8a8a71bc5c8539d16cdc40b4a47ee7024" + }, + { + "ImportPath": "github.com/mitchellh/cli", + "Rev": "eaf0e415fc517a431dca53c7b2e7559d42238ebe" + }, + { + "ImportPath": "github.com/mitchellh/mapstructure", + "Rev": "6fb2c832bcac61d01212ab1d172f7a14a8585b07" + }, + { + "ImportPath": "github.com/ryanuber/columnize", + "Comment": "v2.0.1", + "Rev": "785d943a7b6886e0bb2f139a60487b823dd8d9de" + }, + { + "ImportPath": "github.com/ugorji/go/codec", + "Rev": "71c2886f5a673a35f909803f38ece5810165097b" + } + ] +} From bf26a9160f0fc4678489046cc1492e5f2679bea2 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Tue, 22 Jul 2014 09:36:58 -0400 Subject: [PATCH 28/43] consul: Defer serf handler until initialized. Fixes #254. --- consul/server.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/consul/server.go b/consul/server.go index 1f3825da2..af61dc94c 100644 --- a/consul/server.go +++ b/consul/server.go @@ -187,10 +187,6 @@ func NewServer(config *Config) (*Server, error) { return nil, fmt.Errorf("Failed to start Raft: %v", err) } - // Start the Serf listeners to prevent a deadlock - go s.lanEventHandler() - go s.wanEventHandler() - // Initialize the lan Serf s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false) @@ -198,6 +194,7 @@ func NewServer(config *Config) (*Server, error) { s.Shutdown() return nil, fmt.Errorf("Failed to start lan serf: %v", err) } + go s.lanEventHandler() // Initialize the wan Serf s.serfWAN, err = s.setupSerf(config.SerfWANConfig, @@ -206,6 +203,7 @@ func NewServer(config *Config) (*Server, error) { s.Shutdown() return nil, fmt.Errorf("Failed to start wan serf: %v", err) } + go s.wanEventHandler() // Start listening for RPC requests go s.listen() From 69f5dcf8dd8c0d49a28218726417258ec61ce74d Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Tue, 22 Jul 2014 12:42:24 -0400 Subject: [PATCH 29/43] Add the logo favicon to the webui --- ui/index.html | 1 + ui/static/favicon.png | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 ui/static/favicon.png diff --git a/ui/index.html b/ui/index.html index b8f35f011..3902b8b21 100644 --- a/ui/index.html +++ b/ui/index.html @@ -7,6 +7,7 @@ Consul + diff --git a/ui/static/favicon.png b/ui/static/favicon.png new file mode 100644 index 000000000..8e8418aff --- /dev/null +++ b/ui/static/favicon.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df0dc432899976f2848ffc69ca9d8c0ca51fb6e7f2e4d2b9e68c0c513e5f238e +size 3657 From 98f72c2892bfd9b6a09717309bb6cbdc952557f4 Mon Sep 17 00:00:00 2001 From: Jack Pearkes Date: Tue, 22 Jul 2014 13:31:44 -0400 Subject: [PATCH 30/43] ui: add tests param to readme --- ui/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/README.md b/ui/README.md index 068d28608..6e1fabb59 100644 --- a/ui/README.md +++ b/ui/README.md @@ -44,6 +44,8 @@ An example of this command, from inside the `ui/` directory, would be: consul agent -bootstrap -server -data-dir /tmp/ -ui-dir . +Basic tests can be run by adding the `?test` query parameter to the +application. ### Releasing From 75e631ee947cad10a09967f64215338ed35d35e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:37:25 +0200 Subject: [PATCH 31/43] Add helper for lowercase list of strings --- consul/util.go | 9 +++++++++ consul/util_test.go | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/consul/util.go b/consul/util.go index 00815ea10..96ee5c327 100644 --- a/consul/util.go +++ b/consul/util.go @@ -9,6 +9,7 @@ import ( "path/filepath" "runtime" "strconv" + "strings" "github.com/hashicorp/serf/serf" ) @@ -68,6 +69,14 @@ func strContains(l []string, s string) bool { return false } +func ToLowerList(l []string) []string { + var out []string + for _, value := range l { + out = append(out, strings.ToLower(value)) + } + return out +} + // ensurePath is used to make sure a path exists func ensurePath(path string, dir bool) error { if !dir { diff --git a/consul/util_test.go b/consul/util_test.go index 107146b52..91b7fd2f5 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -18,6 +18,15 @@ func TestStrContains(t *testing.T) { } } +func TestToLowerList(t *testing.T) { + l := []string{"ABC", "Abc", "abc"} + for _, value := range ToLowerList(l) { + if value != "abc" { + t.Fatalf("failed lowercasing") + } + } +} + func TestIsPrivateIP(t *testing.T) { if !isPrivateIP("192.168.1.1") { t.Fatalf("bad") From f7263e8e7a9b56683c37f519b5f1a7fabc2026d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:38:01 +0200 Subject: [PATCH 32/43] Add case-insensitive flag to `MDBIndex` --- consul/mdb_table.go | 13 +++++++------ consul/state_store.go | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/consul/mdb_table.go b/consul/mdb_table.go index c4c84b0dc..53c85d7cf 100644 --- a/consul/mdb_table.go +++ b/consul/mdb_table.go @@ -45,12 +45,13 @@ type MDBTables []*MDBTable // An Index is named, and uses a series of column values to // map to the row-id containing the table type MDBIndex struct { - AllowBlank bool // Can fields be blank - Unique bool // Controls if values are unique - Fields []string // Fields are used to build the index - IdxFunc IndexFunc // Can be used to provide custom indexing - Virtual bool // Virtual index does not exist, but can be used for queries - RealIndex string // Virtual indexes use a RealIndex for iteration + AllowBlank bool // Can fields be blank + Unique bool // Controls if values are unique + Fields []string // Fields are used to build the index + IdxFunc IndexFunc // Can be used to provide custom indexing + Virtual bool // Virtual index does not exist, but can be used for queries + RealIndex string // Virtual indexes use a RealIndex for iteration + CaseInsensitive bool // Controls if values are case-insensitive table *MDBTable name string diff --git a/consul/state_store.go b/consul/state_store.go index a2f139af1..39d778850 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -179,6 +179,7 @@ func (s *StateStore) initialize() error { "id": &MDBIndex{ Unique: true, Fields: []string{"Node"}, + CaseInsensitive: true, }, }, Decoder: func(buf []byte) interface{} { From 93509f755eecd163075ce6429aafe78d63163025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 17 Jul 2014 08:38:24 +0200 Subject: [PATCH 33/43] Test DNS case-insensitivity --- command/agent/dns_test.go | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index cf1ae791f..7d664d74a 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -136,6 +136,40 @@ func TestDNS_NodeLookup(t *testing.T) { } } +func TestDNS_CaseInsensitiveNodeLookup(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Register node + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "Foo", + Address: "127.0.0.1", + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + m := new(dns.Msg) + m.SetQuestion("fOO.node.DC1.consul.", dns.TypeANY) + + c := new(dns.Client) + addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) + in, _, err := c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != 1 { + t.Fatalf("empty lookup: %#v", in) + } +} + func TestDNS_NodeLookup_PeriodName(t *testing.T) { dir, srv := makeDNSServer(t) defer os.RemoveAll(dir) @@ -336,6 +370,45 @@ func TestDNS_ServiceLookup(t *testing.T) { } } +func TestDNS_CaseInsensitiveServiceLookup(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Register node + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + Service: "Db", + Tags: []string{"Master"}, + Port: 12345, + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + m := new(dns.Msg) + m.SetQuestion("mASTER.dB.service.consul.", dns.TypeSRV) + + c := new(dns.Client) + addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) + in, _, err := c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != 1 { + t.Fatalf("empty lookup: %#v", in) + } +} + func TestDNS_ServiceLookup_TagPeriod(t *testing.T) { dir, srv := makeDNSServer(t) defer os.RemoveAll(dir) From eab2cbc1efe9995608ccddc6dd6aa1765e253737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:28:54 +0200 Subject: [PATCH 34/43] Always lowercase incoming DNS query --- command/agent/dns.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/dns.go b/command/agent/dns.go index 522376e15..b83243720 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -248,7 +248,7 @@ func (d *DNSServer) dispatch(network string, req, resp *dns.Msg) { datacenter := d.agent.config.Datacenter // Get the QName without the domain suffix - qName := dns.Fqdn(req.Question[0].Name) + qName := strings.ToLower(dns.Fqdn(req.Question[0].Name)) qName = strings.TrimSuffix(qName, d.domain) // Split into the label parts From d8c772efb8a244d6cec33498d89af83316d9d6d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:29:28 +0200 Subject: [PATCH 35/43] Remove DC case-insensitive check from node test --- command/agent/dns_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index 7d664d74a..7d4226614 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -156,7 +156,7 @@ func TestDNS_CaseInsensitiveNodeLookup(t *testing.T) { } m := new(dns.Msg) - m.SetQuestion("fOO.node.DC1.consul.", dns.TypeANY) + m.SetQuestion("fOO.node.dc1.consul.", dns.TypeANY) c := new(dns.Client) addr, _ := srv.agent.config.ClientListener(srv.agent.config.Ports.DNS) From ff93acda288c1189f88f8b815eb45058072b03e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:30:12 +0200 Subject: [PATCH 36/43] Lowercase index key and lookup value if flag is set --- consul/mdb_table.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/consul/mdb_table.go b/consul/mdb_table.go index 53c85d7cf..592bce849 100644 --- a/consul/mdb_table.go +++ b/consul/mdb_table.go @@ -427,6 +427,10 @@ func (t *MDBTable) getIndex(index string, parts []string) (*MDBIndex, []byte, er return nil, nil, tooManyFields } + if idx.CaseInsensitive { + parts = ToLowerList(parts) + } + // Construct the key key := idx.keyFromParts(parts...) return idx, key, nil @@ -614,6 +618,9 @@ func (i *MDBIndex) keyFromObject(obj interface{}) ([]byte, error) { if !i.AllowBlank && val == "" { return nil, fmt.Errorf("Field '%s' must be set: %#v", field, obj) } + if i.CaseInsensitive { + val = strings.ToLower(val) + } parts = append(parts, val) } key := i.keyFromParts(parts...) From 2727c158a6a375e1b66ad2a6fa73071795fafa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:33:27 +0200 Subject: [PATCH 37/43] Make service index case-insensitive --- consul/state_store.go | 1 + 1 file changed, 1 insertion(+) diff --git a/consul/state_store.go b/consul/state_store.go index 39d778850..ed823eb2e 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -201,6 +201,7 @@ func (s *StateStore) initialize() error { "service": &MDBIndex{ AllowBlank: true, Fields: []string{"ServiceName"}, + CaseInsensitive: true, }, }, Decoder: func(buf []byte) interface{} { From 9dc67edf7fbb5d5314bea2966c7508f00805d5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:33:47 +0200 Subject: [PATCH 38/43] Make service tag filter case-insensitive --- consul/state_store.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/consul/state_store.go b/consul/state_store.go index ed823eb2e..043a87103 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -642,7 +642,8 @@ func serviceTagFilter(l []interface{}, tag string) []interface{} { n := len(l) for i := 0; i < n; i++ { srv := l[i].(*structs.ServiceNode) - if !strContains(srv.ServiceTags, tag) { + srv.ServiceTags = ToLowerList(srv.ServiceTags) + if !strContains(srv.ServiceTags, strings.ToLower(tag)) { l[i], l[n-1] = l[n-1], nil i-- n-- From 31037338a34f780251bb6a0233719b674ea8ad29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 10:34:03 +0200 Subject: [PATCH 39/43] Change order of fixtures --- consul/catalog_endpoint_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/consul/catalog_endpoint_test.go b/consul/catalog_endpoint_test.go index f9721ca09..06066b7db 100644 --- a/consul/catalog_endpoint_test.go +++ b/consul/catalog_endpoint_test.go @@ -220,13 +220,13 @@ func TestCatalogListNodes(t *testing.T) { }) // Server node is auto added from Serf - if out.Nodes[0].Node != s1.config.NodeName { + if out.Nodes[1].Node != s1.config.NodeName { t.Fatalf("bad: %v", out) } - if out.Nodes[1].Node != "foo" { + if out.Nodes[0].Node != "foo" { t.Fatalf("bad: %v", out) } - if out.Nodes[1].Address != "127.0.0.1" { + if out.Nodes[0].Address != "127.0.0.1" { t.Fatalf("bad: %v", out) } } From 78a69b61a35e7f705def2d1bda52ec0e674c3aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 15:11:45 +0200 Subject: [PATCH 40/43] Don't override `ServiceTags` --- consul/state_store.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/consul/state_store.go b/consul/state_store.go index 043a87103..12e1c6c45 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -642,8 +642,7 @@ func serviceTagFilter(l []interface{}, tag string) []interface{} { n := len(l) for i := 0; i < n; i++ { srv := l[i].(*structs.ServiceNode) - srv.ServiceTags = ToLowerList(srv.ServiceTags) - if !strContains(srv.ServiceTags, strings.ToLower(tag)) { + if !strContains(ToLowerList(srv.ServiceTags), strings.ToLower(tag)) { l[i], l[n-1] = l[n-1], nil i-- n-- From 29fdfb0f43f0a1db065249573b62126fc56886ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 23:39:13 +0200 Subject: [PATCH 41/43] Short doc note about DNS case-insensitivity --- website/source/docs/agent/dns.html.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website/source/docs/agent/dns.html.markdown b/website/source/docs/agent/dns.html.markdown index 46303a60d..938c989a6 100644 --- a/website/source/docs/agent/dns.html.markdown +++ b/website/source/docs/agent/dns.html.markdown @@ -20,7 +20,8 @@ with no failing health checks. It's that simple! There are a number of [configuration options](/docs/agent/options.html) that are important for the DNS interface. They are `client_addr`, `ports.dns`, `recursor`, `domain`, and `dns_config`. By default Consul will listen on 127.0.0.1:8600 for DNS queries -in the "consul." domain, without support for DNS recursion. +in the "consul." domain, without support for DNS recursion. All queries are case-insensitive, a +name lookup for `PostgreSQL.node.dc1.consul` will find all nodes named `postgresql`, no matter of case. There are a few ways to use the DNS interface. One option is to use a custom DNS resolver library and point it at Consul. Another option is to set Consul From 2a58c29ed246747675ffb82efed3bb3aeb8708dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Wed, 23 Jul 2014 23:45:03 +0200 Subject: [PATCH 42/43] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2954ce23..97f87722e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.3.2 (Unreleased) + +IMPROVEMENTS: + + * DNS case-insensitivity [GH-189] + ## 0.3.1 (July 21, 2014) FEATURES: From 90816cca98ed2875733cc8db7f6164c42c6a765b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?William=20Tisa=CC=88ter?= Date: Thu, 24 Jul 2014 01:09:55 +0200 Subject: [PATCH 43/43] Run `go fmt` --- consul/state_store.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/consul/state_store.go b/consul/state_store.go index 12e1c6c45..f95b0554e 100644 --- a/consul/state_store.go +++ b/consul/state_store.go @@ -177,8 +177,8 @@ func (s *StateStore) initialize() error { Name: dbNodes, Indexes: map[string]*MDBIndex{ "id": &MDBIndex{ - Unique: true, - Fields: []string{"Node"}, + Unique: true, + Fields: []string{"Node"}, CaseInsensitive: true, }, }, @@ -199,8 +199,8 @@ func (s *StateStore) initialize() error { Fields: []string{"Node", "ServiceID"}, }, "service": &MDBIndex{ - AllowBlank: true, - Fields: []string{"ServiceName"}, + AllowBlank: true, + Fields: []string{"ServiceName"}, CaseInsensitive: true, }, },