Merge pull request #1331 from slackpad/f-network-tomography
Adds network tomography features to Consul.
This commit is contained in:
commit
f6c774bec3
|
@ -44,6 +44,12 @@ type QueryOptions struct {
|
|||
// Token is used to provide a per-request ACL token
|
||||
// which overrides the agent's default token.
|
||||
Token string
|
||||
|
||||
// Near is used to provide a node name that will sort the results
|
||||
// in ascending order based on the estimated round trip time from
|
||||
// that node. Setting this to "_agent" will use the agent's node
|
||||
// for the sort.
|
||||
Near string
|
||||
}
|
||||
|
||||
// WriteOptions are used to parameterize a write
|
||||
|
@ -250,6 +256,9 @@ func (r *request) setQueryOptions(q *QueryOptions) {
|
|||
if q.Token != "" {
|
||||
r.params.Set("token", q.Token)
|
||||
}
|
||||
if q.Near != "" {
|
||||
r.params.Set("near", q.Near)
|
||||
}
|
||||
}
|
||||
|
||||
// durToMsec converts a duration to a millisecond specified string
|
||||
|
|
|
@ -127,6 +127,7 @@ func TestSetQueryOptions(t *testing.T) {
|
|||
WaitIndex: 1000,
|
||||
WaitTime: 100 * time.Second,
|
||||
Token: "12345",
|
||||
Near: "nodex",
|
||||
}
|
||||
r.setQueryOptions(q)
|
||||
|
||||
|
@ -148,6 +149,9 @@ func TestSetQueryOptions(t *testing.T) {
|
|||
if r.params.Get("token") != "12345" {
|
||||
t.Fatalf("bad: %v", r.params)
|
||||
}
|
||||
if r.params.Get("near") != "nodex" {
|
||||
t.Fatalf("bad: %v", r.params)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetWriteOptions(t *testing.T) {
|
||||
|
|
66
api/coordinate.go
Normal file
66
api/coordinate.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// CoordinateEntry represents a node and its associated network coordinate.
|
||||
type CoordinateEntry struct {
|
||||
Node string
|
||||
Coord *coordinate.Coordinate
|
||||
}
|
||||
|
||||
// CoordinateDatacenterMap represents a datacenter and its associated WAN
|
||||
// nodes and their associates coordinates.
|
||||
type CoordinateDatacenterMap struct {
|
||||
Datacenter string
|
||||
Coordinates []CoordinateEntry
|
||||
}
|
||||
|
||||
// Coordinate can be used to query the coordinate endpoints
|
||||
type Coordinate struct {
|
||||
c *Client
|
||||
}
|
||||
|
||||
// Coordinate returns a handle to the coordinate endpoints
|
||||
func (c *Client) Coordinate() *Coordinate {
|
||||
return &Coordinate{c}
|
||||
}
|
||||
|
||||
// Datacenters is used to return the coordinates of all the servers in the WAN
|
||||
// pool.
|
||||
func (c *Coordinate) Datacenters() ([]*CoordinateDatacenterMap, error) {
|
||||
r := c.c.newRequest("GET", "/v1/coordinate/datacenters")
|
||||
_, resp, err := requireOK(c.c.doRequest(r))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var out []*CoordinateDatacenterMap
|
||||
if err := decodeBody(resp, &out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Nodes is used to return the coordinates of all the nodes in the LAN pool.
|
||||
func (c *Coordinate) Nodes(q *QueryOptions) ([]*CoordinateEntry, *QueryMeta, error) {
|
||||
r := c.c.newRequest("GET", "/v1/coordinate/nodes")
|
||||
r.setQueryOptions(q)
|
||||
rtt, resp, err := requireOK(c.c.doRequest(r))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
qm := &QueryMeta{}
|
||||
parseQueryMeta(resp, qm)
|
||||
qm.RequestTime = rtt
|
||||
|
||||
var out []*CoordinateEntry
|
||||
if err := decodeBody(resp, &out); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return out, qm, nil
|
||||
}
|
54
api/coordinate_test.go
Normal file
54
api/coordinate_test.go
Normal file
|
@ -0,0 +1,54 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
)
|
||||
|
||||
func TestCoordinate_Datacenters(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t)
|
||||
defer s.Stop()
|
||||
|
||||
coordinate := c.Coordinate()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
datacenters, err := coordinate.Datacenters()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if len(datacenters) == 0 {
|
||||
return false, fmt.Errorf("Bad: %v", datacenters)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %s", err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestCoordinate_Nodes(t *testing.T) {
|
||||
t.Parallel()
|
||||
c, s := makeClient(t)
|
||||
defer s.Stop()
|
||||
|
||||
coordinate := c.Coordinate()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
_, _, err := coordinate.Nodes(nil)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// There's not a good way to populate coordinates without
|
||||
// waiting for them to calculate and update, so the best
|
||||
// we can do is call the endpoint and make sure we don't
|
||||
// get an error.
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %s", err)
|
||||
})
|
||||
}
|
|
@ -17,6 +17,7 @@ import (
|
|||
"github.com/hashicorp/consul/consul"
|
||||
"github.com/hashicorp/consul/consul/state"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
|
@ -191,6 +192,11 @@ func Create(config *Config, logOutput io.Writer) (*Agent, error) {
|
|||
// Start handling events
|
||||
go agent.handleEvents()
|
||||
|
||||
// Start sending network coordinate to the server.
|
||||
if !config.DisableCoordinates {
|
||||
go agent.sendCoordinate()
|
||||
}
|
||||
|
||||
// Write out the PID file if necessary
|
||||
err = agent.storePid()
|
||||
if err != nil {
|
||||
|
@ -539,6 +545,22 @@ func (a *Agent) WANMembers() []serf.Member {
|
|||
}
|
||||
}
|
||||
|
||||
// CanServersUnderstandProtocol checks to see if all the servers understand the
|
||||
// given protocol version.
|
||||
func (a *Agent) CanServersUnderstandProtocol(version uint8) bool {
|
||||
numServers, numWhoGrok := 0, 0
|
||||
members := a.LANMembers()
|
||||
for _, member := range members {
|
||||
if member.Tags["role"] == "consul" {
|
||||
numServers++
|
||||
if member.ProtocolMax >= version {
|
||||
numWhoGrok++
|
||||
}
|
||||
}
|
||||
}
|
||||
return (numServers > 0) && (numWhoGrok == numServers)
|
||||
}
|
||||
|
||||
// StartSync is called once Services and Checks are registered.
|
||||
// This is called to prevent a race between clients and the anti-entropy routines
|
||||
func (a *Agent) StartSync() {
|
||||
|
@ -556,6 +578,58 @@ func (a *Agent) ResumeSync() {
|
|||
a.state.Resume()
|
||||
}
|
||||
|
||||
// Returns the coordinate of this node in the local pool (assumes coordinates
|
||||
// are enabled, so check that before calling).
|
||||
func (a *Agent) GetCoordinate() (*coordinate.Coordinate, error) {
|
||||
if a.config.Server {
|
||||
return a.server.GetLANCoordinate()
|
||||
} else {
|
||||
return a.client.GetCoordinate()
|
||||
}
|
||||
}
|
||||
|
||||
// sendCoordinate is a long-running loop that periodically sends our coordinate
|
||||
// to the server. Closing the agent's shutdownChannel will cause this to exit.
|
||||
func (a *Agent) sendCoordinate() {
|
||||
for {
|
||||
rate := a.config.SyncCoordinateRateTarget
|
||||
min := a.config.SyncCoordinateIntervalMin
|
||||
intv := rateScaledInterval(rate, min, len(a.LANMembers()))
|
||||
intv = intv + randomStagger(intv)
|
||||
|
||||
select {
|
||||
case <-time.After(intv):
|
||||
if !a.CanServersUnderstandProtocol(3) {
|
||||
continue
|
||||
}
|
||||
|
||||
var c *coordinate.Coordinate
|
||||
var err error
|
||||
if c, err = a.GetCoordinate(); err != nil {
|
||||
a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO - Consider adding a distance check so we don't send
|
||||
// an update if the position hasn't changed by more than a
|
||||
// threshold.
|
||||
req := structs.CoordinateUpdateRequest{
|
||||
Datacenter: a.config.Datacenter,
|
||||
Node: a.config.NodeName,
|
||||
Coord: c,
|
||||
WriteRequest: structs.WriteRequest{Token: a.config.ACLToken},
|
||||
}
|
||||
var reply struct{}
|
||||
if err := a.RPC("Coordinate.Update", &req, &reply); err != nil {
|
||||
a.logger.Printf("[ERR] agent: coordinate update error: %s", err)
|
||||
continue
|
||||
}
|
||||
case <-a.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// persistService saves a service definition to a JSON file in the data dir
|
||||
func (a *Agent) persistService(service *structs.NodeService) error {
|
||||
svcPath := filepath.Join(a.config.DataDir, servicesDir, stringHash(service.ID))
|
||||
|
|
|
@ -3,6 +3,7 @@ package agent
|
|||
import (
|
||||
"fmt"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
@ -11,12 +12,22 @@ import (
|
|||
|
||||
type AgentSelf struct {
|
||||
Config *Config
|
||||
Coord *coordinate.Coordinate
|
||||
Member serf.Member
|
||||
}
|
||||
|
||||
func (s *HTTPServer) AgentSelf(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
var c *coordinate.Coordinate
|
||||
if !s.agent.config.DisableCoordinates {
|
||||
var err error
|
||||
if c, err = s.agent.GetCoordinate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return AgentSelf{
|
||||
Config: s.agent.config,
|
||||
Coord: c,
|
||||
Member: s.agent.LocalMember(),
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -81,7 +82,7 @@ func TestHTTPAgentSelf(t *testing.T) {
|
|||
|
||||
obj, err := srv.AgentSelf(nil, req)
|
||||
if err != nil {
|
||||
t.Fatalf("Err: %v", err)
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
val := obj.(AgentSelf)
|
||||
|
@ -92,6 +93,24 @@ func TestHTTPAgentSelf(t *testing.T) {
|
|||
if int(val.Config.Ports.SerfLan) != srv.agent.config.Ports.SerfLan {
|
||||
t.Fatalf("incorrect port: %v", obj)
|
||||
}
|
||||
|
||||
c, err := srv.agent.server.GetLANCoordinate()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(c, val.Coord) {
|
||||
t.Fatalf("coordinates are not equal: %v != %v", c, val.Coord)
|
||||
}
|
||||
|
||||
srv.agent.config.DisableCoordinates = true
|
||||
obj, err = srv.AgentSelf(nil, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
val = obj.(AgentSelf)
|
||||
if val.Coord != nil {
|
||||
t.Fatalf("should have been nil: %v", val.Coord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgentMembers(t *testing.T) {
|
||||
|
|
|
@ -59,6 +59,8 @@ func nextConfig() *Config {
|
|||
cons.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
|
||||
cons.RaftConfig.ElectionTimeout = 40 * time.Millisecond
|
||||
|
||||
cons.DisableCoordinates = false
|
||||
cons.CoordinateUpdatePeriod = 100 * time.Millisecond
|
||||
return conf
|
||||
}
|
||||
|
||||
|
@ -1579,3 +1581,51 @@ func TestAgent_purgeCheckState(t *testing.T) {
|
|||
t.Fatalf("should have removed file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_GetCoordinate(t *testing.T) {
|
||||
check := func(server bool) {
|
||||
config := nextConfig()
|
||||
config.Server = server
|
||||
dir, agent := makeAgent(t, config)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
// This doesn't verify the returned coordinate, but it makes
|
||||
// sure that the agent chooses the correct Serf instance,
|
||||
// depending on how it's configured as a client or a server.
|
||||
// If it chooses the wrong one, this will crash.
|
||||
if _, err := agent.GetCoordinate(); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
check(true)
|
||||
check(false)
|
||||
}
|
||||
|
||||
func TestAgent_CanServersUnderstandProtocol(t *testing.T) {
|
||||
config := nextConfig()
|
||||
dir, agent := makeAgent(t, config)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
min := uint8(consul.ProtocolVersionMin)
|
||||
if !agent.CanServersUnderstandProtocol(min) {
|
||||
t.Fatalf("should grok %d", min)
|
||||
}
|
||||
|
||||
max := uint8(consul.ProtocolVersionMax)
|
||||
if !agent.CanServersUnderstandProtocol(max) {
|
||||
t.Fatalf("should grok %d", max)
|
||||
}
|
||||
|
||||
current := uint8(config.Protocol)
|
||||
if !agent.CanServersUnderstandProtocol(current) {
|
||||
t.Fatalf("should grok %d", current)
|
||||
}
|
||||
|
||||
future := max + 1
|
||||
if agent.CanServersUnderstandProtocol(future) {
|
||||
t.Fatalf("should not grok %d", future)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,6 +60,7 @@ func (s *HTTPServer) CatalogDatacenters(resp http.ResponseWriter, req *http.Requ
|
|||
func (s *HTTPServer) CatalogNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Setup the request
|
||||
args := structs.DCSpecificRequest{}
|
||||
s.parseSource(req, &args.Source)
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -90,6 +91,7 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request
|
|||
func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Set default DC
|
||||
args := structs.ServiceSpecificRequest{}
|
||||
s.parseSource(req, &args.Source)
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -2,13 +2,15 @@ package agent
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
func TestCatalogRegister(t *testing.T) {
|
||||
|
@ -204,6 +206,101 @@ func TestCatalogNodes_Blocking(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCatalogNodes_DistanceSort(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
// Register nodes.
|
||||
args := &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
args = &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Address: "127.0.0.2",
|
||||
}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Nobody has coordinates set so this will still return them in the
|
||||
// order they are indexed.
|
||||
req, err := http.NewRequest("GET", "/v1/catalog/nodes?dc=dc1&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.CatalogNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
assertIndex(t, resp)
|
||||
nodes := obj.(structs.Nodes)
|
||||
if len(nodes) != 3 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[2].Node != srv.agent.config.NodeName {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
|
||||
// Send an update for the node and wait for it to get applied.
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query again and now foo should have moved to the front of the line.
|
||||
req, err = http.NewRequest("GET", "/v1/catalog/nodes?dc=dc1&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp = httptest.NewRecorder()
|
||||
obj, err = srv.CatalogNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
assertIndex(t, resp)
|
||||
nodes = obj.(structs.Nodes)
|
||||
if len(nodes) != 3 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[2].Node != srv.agent.config.NodeName {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatalogServices(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
@ -289,6 +386,108 @@ func TestCatalogServiceNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCatalogServiceNodes_DistanceSort(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
// Register nodes.
|
||||
args := &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Address: "127.0.0.1",
|
||||
Service: &structs.NodeService{
|
||||
Service: "api",
|
||||
Tags: []string{"a"},
|
||||
},
|
||||
}
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/catalog/service/api?tag=a", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
args = &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Address: "127.0.0.2",
|
||||
Service: &structs.NodeService{
|
||||
Service: "api",
|
||||
Tags: []string{"a"},
|
||||
},
|
||||
}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Nobody has coordinates set so this will still return them in the
|
||||
// order they are indexed.
|
||||
req, err = http.NewRequest("GET", "/v1/catalog/service/api?tag=a&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.CatalogServiceNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
assertIndex(t, resp)
|
||||
nodes := obj.(structs.ServiceNodes)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
|
||||
// Send an update for the node and wait for it to get applied.
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query again and now foo should have moved to the front of the line.
|
||||
req, err = http.NewRequest("GET", "/v1/catalog/service/api?tag=a&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp = httptest.NewRecorder()
|
||||
obj, err = srv.CatalogServiceNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
assertIndex(t, resp)
|
||||
nodes = obj.(structs.ServiceNodes)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatalogNodeServices(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
|
|
@ -368,10 +368,25 @@ type Config struct {
|
|||
AtlasEndpoint string `mapstructure:"atlas_endpoint"`
|
||||
|
||||
// AEInterval controls the anti-entropy interval. This is how often
|
||||
// the agent attempts to reconcile it's local state with the server'
|
||||
// the agent attempts to reconcile its local state with the server's
|
||||
// representation of our state. Defaults to every 60s.
|
||||
AEInterval time.Duration `mapstructure:"-" json:"-"`
|
||||
|
||||
// DisableCoordinates controls features related to network coordinates.
|
||||
DisableCoordinates bool `mapstructure:"disable_coordinates"`
|
||||
|
||||
// SyncCoordinateRateTarget controls the rate for sending network
|
||||
// coordinates to the server, in updates per second. This is the max rate
|
||||
// that the server supports, so we scale our interval based on the size
|
||||
// of the cluster to try to achieve this in aggregate at the server.
|
||||
SyncCoordinateRateTarget float64 `mapstructure:"-" json:"-"`
|
||||
|
||||
// SyncCoordinateIntervalMin sets the minimum interval that coordinates
|
||||
// will be sent to the server. We scale the interval based on the cluster
|
||||
// size, but below a certain interval it doesn't make sense send them any
|
||||
// faster.
|
||||
SyncCoordinateIntervalMin time.Duration `mapstructure:"-" json:"-"`
|
||||
|
||||
// Checks holds the provided check definitions
|
||||
Checks []*CheckDefinition `mapstructure:"-" json:"-"`
|
||||
|
||||
|
@ -463,9 +478,18 @@ func DefaultConfig() *Config {
|
|||
},
|
||||
StatsitePrefix: "consul",
|
||||
SyslogFacility: "LOCAL0",
|
||||
Protocol: consul.ProtocolVersionMax,
|
||||
Protocol: consul.ProtocolVersion2Compatible,
|
||||
CheckUpdateInterval: 5 * time.Minute,
|
||||
AEInterval: time.Minute,
|
||||
DisableCoordinates: false,
|
||||
|
||||
// SyncCoordinateRateTarget is set based on the rate that we want
|
||||
// the server to handle as an aggregate across the entire cluster.
|
||||
// If you update this, you'll need to adjust CoordinateUpdate* in
|
||||
// the server-side config accordingly.
|
||||
SyncCoordinateRateTarget: 64.0, // updates / second
|
||||
SyncCoordinateIntervalMin: 15 * time.Second,
|
||||
|
||||
ACLTTL: 30 * time.Second,
|
||||
ACLDownPolicy: "extend-cache",
|
||||
ACLDefaultPolicy: "allow",
|
||||
|
@ -1063,6 +1087,9 @@ func MergeConfig(a, b *Config) *Config {
|
|||
if b.AtlasEndpoint != "" {
|
||||
result.AtlasEndpoint = b.AtlasEndpoint
|
||||
}
|
||||
if b.DisableCoordinates {
|
||||
result.DisableCoordinates = true
|
||||
}
|
||||
if b.SessionTTLMinRaw != "" {
|
||||
result.SessionTTLMin = b.SessionTTLMin
|
||||
result.SessionTTLMinRaw = b.SessionTTLMinRaw
|
||||
|
|
|
@ -734,6 +734,17 @@ func TestDecodeConfig(t *testing.T) {
|
|||
t.Fatalf("bad: %#v", config)
|
||||
}
|
||||
|
||||
// Coordinate disable
|
||||
input = `{"disable_coordinates": true}`
|
||||
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
if config.DisableCoordinates != true {
|
||||
t.Fatalf("bad: coordinates not disabled: %#v", config)
|
||||
}
|
||||
|
||||
// SessionTTLMin
|
||||
input = `{"session_ttl_min": "5s"}`
|
||||
config, err = DecodeConfig(bytes.NewReader([]byte(input)))
|
||||
|
|
66
command/agent/coordinate_endpoint.go
Normal file
66
command/agent/coordinate_endpoint.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"net/http"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// coordinateDisabled handles all the endpoints when coordinates are not enabled,
|
||||
// returning an error message.
|
||||
func coordinateDisabled(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
resp.WriteHeader(401)
|
||||
resp.Write([]byte("Coordinate support disabled"))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// sorter wraps a coordinate list and implements the sort.Interface to sort by
|
||||
// node name.
|
||||
type sorter struct {
|
||||
coordinates structs.Coordinates
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (s *sorter) Len() int {
|
||||
return len(s.coordinates)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (s *sorter) Swap(i, j int) {
|
||||
s.coordinates[i], s.coordinates[j] = s.coordinates[j], s.coordinates[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (s *sorter) Less(i, j int) bool {
|
||||
return s.coordinates[i].Node < s.coordinates[j].Node
|
||||
}
|
||||
|
||||
// CoordinateDatacenters returns the WAN nodes in each datacenter, along with
|
||||
// raw network coordinates.
|
||||
func (s *HTTPServer) CoordinateDatacenters(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
var out []structs.DatacenterMap
|
||||
if err := s.agent.RPC("Coordinate.ListDatacenters", struct{}{}, &out); err != nil {
|
||||
for i := range out {
|
||||
sort.Sort(&sorter{out[i].Coordinates})
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// CoordinateNodes returns the LAN nodes in the given datacenter, along with
|
||||
// raw network coordinates.
|
||||
func (s *HTTPServer) CoordinateNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
args := structs.DCSpecificRequest{}
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var out structs.IndexedCoordinates
|
||||
defer setMeta(resp, &out.QueryMeta)
|
||||
if err := s.agent.RPC("Coordinate.ListNodes", &args, &out); err != nil {
|
||||
sort.Sort(&sorter{out.Coordinates})
|
||||
return nil, err
|
||||
}
|
||||
return out.Coordinates, nil
|
||||
}
|
105
command/agent/coordinate_endpoint_test.go
Normal file
105
command/agent/coordinate_endpoint_test.go
Normal file
|
@ -0,0 +1,105 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
func TestCoordinate_Datacenters(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/coordinate/datacenters", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.CoordinateDatacenters(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
maps := obj.([]structs.DatacenterMap)
|
||||
if len(maps) != 1 ||
|
||||
maps[0].Datacenter != "dc1" ||
|
||||
len(maps[0].Coordinates) != 1 ||
|
||||
maps[0].Coordinates[0].Node != srv.agent.config.NodeName {
|
||||
t.Fatalf("bad: %v", maps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoordinate_Nodes(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
// Register the nodes.
|
||||
nodes := []string{"foo", "bar"}
|
||||
for _, node := range nodes {
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: node,
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Send some coordinates for a few nodes, waiting a little while for the
|
||||
// batch update to run.
|
||||
arg1 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg2 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg2, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query back and check the nodes are present and sorted correctly.
|
||||
req, err := http.NewRequest("GET", "/v1/coordinate/nodes?dc=dc1", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.CoordinateNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
coordinates := obj.(structs.Coordinates)
|
||||
if len(coordinates) != 2 ||
|
||||
coordinates[0].Node != "bar" ||
|
||||
coordinates[1].Node != "foo" {
|
||||
t.Fatalf("bad: %v", coordinates)
|
||||
}
|
||||
}
|
|
@ -9,6 +9,7 @@ import (
|
|||
func (s *HTTPServer) HealthChecksInState(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Set default DC
|
||||
args := structs.ChecksInStateRequest{}
|
||||
s.parseSource(req, &args.Source)
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -57,6 +58,7 @@ func (s *HTTPServer) HealthNodeChecks(resp http.ResponseWriter, req *http.Reques
|
|||
func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Set default DC
|
||||
args := structs.ServiceSpecificRequest{}
|
||||
s.parseSource(req, &args.Source)
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -81,6 +83,7 @@ func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Req
|
|||
func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Set default DC
|
||||
args := structs.ServiceSpecificRequest{}
|
||||
s.parseSource(req, &args.Source)
|
||||
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -2,13 +2,16 @@ package agent
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
func TestHealthChecksInState(t *testing.T) {
|
||||
|
@ -38,6 +41,87 @@ func TestHealthChecksInState(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestHealthChecksInState_DistanceSort(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
args := &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Address: "127.0.0.1",
|
||||
Check: &structs.HealthCheck{
|
||||
Node: "bar",
|
||||
Name: "node check",
|
||||
Status: structs.HealthCritical,
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
args.Node, args.Check.Node = "foo", "foo"
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/health/state/critical?dc=dc1&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.HealthChecksInState(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes := obj.(structs.HealthChecks)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[0].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
|
||||
// Send an update for the node and wait for it to get applied.
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query again and now foo should have moved to the front of the line.
|
||||
resp = httptest.NewRecorder()
|
||||
obj, err = srv.HealthChecksInState(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes = obj.(structs.HealthChecks)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealthNodeChecks(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
@ -110,6 +194,92 @@ func TestHealthServiceChecks(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHealthServiceChecks_DistanceSort(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
// Create a service check
|
||||
args := &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Address: "127.0.0.1",
|
||||
Service: &structs.NodeService{
|
||||
ID: "test",
|
||||
Service: "test",
|
||||
},
|
||||
Check: &structs.HealthCheck{
|
||||
Node: "bar",
|
||||
Name: "test check",
|
||||
ServiceID: "test",
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
args.Node, args.Check.Node = "foo", "foo"
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/health/checks/test?dc=dc1&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.HealthServiceChecks(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes := obj.(structs.HealthChecks)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
|
||||
// Send an update for the node and wait for it to get applied.
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query again and now foo should have moved to the front of the line.
|
||||
resp = httptest.NewRecorder()
|
||||
obj, err = srv.HealthServiceChecks(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes = obj.(structs.HealthChecks)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealthServiceNodes(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
@ -138,6 +308,92 @@ func TestHealthServiceNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHealthServiceNodes_DistanceSort(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, srv.agent.RPC, "dc1")
|
||||
|
||||
// Create a service check
|
||||
args := &structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Address: "127.0.0.1",
|
||||
Service: &structs.NodeService{
|
||||
ID: "test",
|
||||
Service: "test",
|
||||
},
|
||||
Check: &structs.HealthCheck{
|
||||
Node: "bar",
|
||||
Name: "test check",
|
||||
ServiceID: "test",
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
args.Node, args.Check.Node = "foo", "foo"
|
||||
if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/health/service/test?dc=dc1&near=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
obj, err := srv.HealthServiceNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes := obj.(structs.CheckServiceNodes)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node.Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node.Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
|
||||
// Send an update for the node and wait for it to get applied.
|
||||
arg := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()),
|
||||
}
|
||||
if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Query again and now foo should have moved to the front of the line.
|
||||
resp = httptest.NewRecorder()
|
||||
obj, err = srv.HealthServiceNodes(resp, req)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
assertIndex(t, resp)
|
||||
nodes = obj.(structs.CheckServiceNodes)
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("bad: %v", obj)
|
||||
}
|
||||
if nodes[0].Node.Node != "foo" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
if nodes[1].Node.Node != "bar" {
|
||||
t.Fatalf("bad: %v", nodes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealthServiceNodes_PassingFilter(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
|
|
@ -206,6 +206,14 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
|
|||
s.mux.HandleFunc("/v1/catalog/service/", s.wrap(s.CatalogServiceNodes))
|
||||
s.mux.HandleFunc("/v1/catalog/node/", s.wrap(s.CatalogNodeServices))
|
||||
|
||||
if !s.agent.config.DisableCoordinates {
|
||||
s.mux.HandleFunc("/v1/coordinate/datacenters", s.wrap(s.CoordinateDatacenters))
|
||||
s.mux.HandleFunc("/v1/coordinate/nodes", s.wrap(s.CoordinateNodes))
|
||||
} else {
|
||||
s.mux.HandleFunc("/v1/coordinate/datacenters", s.wrap(coordinateDisabled))
|
||||
s.mux.HandleFunc("/v1/coordinate/nodes", s.wrap(coordinateDisabled))
|
||||
}
|
||||
|
||||
s.mux.HandleFunc("/v1/health/node/", s.wrap(s.HealthNodeChecks))
|
||||
s.mux.HandleFunc("/v1/health/checks/", s.wrap(s.HealthServiceChecks))
|
||||
s.mux.HandleFunc("/v1/health/state/", s.wrap(s.HealthChecksInState))
|
||||
|
@ -485,6 +493,20 @@ func (s *HTTPServer) parseToken(req *http.Request, token *string) {
|
|||
*token = s.agent.config.ACLToken
|
||||
}
|
||||
|
||||
// parseSource is used to parse the ?near=<node> query parameter, used for
|
||||
// sorting by RTT based on a source node. We set the source's DC to the target
|
||||
// DC in the request, if given, or else the agent's DC.
|
||||
func (s *HTTPServer) parseSource(req *http.Request, source *structs.QuerySource) {
|
||||
s.parseDC(req, &source.Datacenter)
|
||||
if node := req.URL.Query().Get("near"); node != "" {
|
||||
if node == "_agent" {
|
||||
source.Node = s.agent.config.NodeName
|
||||
} else {
|
||||
source.Node = node
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parse is a convenience method for endpoints that need
|
||||
// to use both parseWait and parseDC.
|
||||
func (s *HTTPServer) parse(resp http.ResponseWriter, req *http.Request, dc *string, b *structs.QueryOptions) bool {
|
||||
|
|
|
@ -337,6 +337,63 @@ func testPrettyPrint(pretty string, t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseSource(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Default is agent's DC and no node (since the user didn't care, then
|
||||
// just give them the cheapest possible query).
|
||||
req, err := http.NewRequest("GET",
|
||||
"/v1/catalog/nodes", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
source := structs.QuerySource{}
|
||||
srv.parseSource(req, &source)
|
||||
if source.Datacenter != "dc1" || source.Node != "" {
|
||||
t.Fatalf("bad: %v", source)
|
||||
}
|
||||
|
||||
// Adding the source parameter should set that node.
|
||||
req, err = http.NewRequest("GET",
|
||||
"/v1/catalog/nodes?near=bob", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
source = structs.QuerySource{}
|
||||
srv.parseSource(req, &source)
|
||||
if source.Datacenter != "dc1" || source.Node != "bob" {
|
||||
t.Fatalf("bad: %v", source)
|
||||
}
|
||||
|
||||
// We should follow whatever dc parameter was given so that the node is
|
||||
// looked up correctly on the receiving end.
|
||||
req, err = http.NewRequest("GET",
|
||||
"/v1/catalog/nodes?near=bob&dc=foo", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
source = structs.QuerySource{}
|
||||
srv.parseSource(req, &source)
|
||||
if source.Datacenter != "foo" || source.Node != "bob" {
|
||||
t.Fatalf("bad: %v", source)
|
||||
}
|
||||
|
||||
// The magic "_agent" node name will use the agent's local node name.
|
||||
req, err = http.NewRequest("GET",
|
||||
"/v1/catalog/nodes?near=_agent", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
source = structs.QuerySource{}
|
||||
srv.parseSource(req, &source)
|
||||
if source.Datacenter != "dc1" || source.Node != srv.agent.config.NodeName {
|
||||
t.Fatalf("bad: %v", source)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWait(t *testing.T) {
|
||||
resp := httptest.NewRecorder()
|
||||
var b structs.QueryOptions
|
||||
|
|
|
@ -387,6 +387,12 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
var testRegisterRules = `
|
||||
service "api" {
|
||||
policy = "write"
|
||||
}
|
||||
`
|
||||
|
||||
func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
|
||||
conf := nextConfig()
|
||||
conf.ACLDatacenter = "dc1"
|
||||
|
@ -796,8 +802,35 @@ func TestAgent_nestedPauseResume(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
var testRegisterRules = `
|
||||
service "api" {
|
||||
policy = "write"
|
||||
func TestAgent_sendCoordinate(t *testing.T) {
|
||||
conf := nextConfig()
|
||||
conf.SyncCoordinateRateTarget = 10.0 // updates/sec
|
||||
conf.SyncCoordinateIntervalMin = 1 * time.Millisecond
|
||||
conf.ConsulConfig.CoordinateUpdatePeriod = 100 * time.Millisecond
|
||||
conf.ConsulConfig.CoordinateUpdateBatchSize = 10
|
||||
conf.ConsulConfig.CoordinateUpdateMaxBatches = 1
|
||||
dir, agent := makeAgent(t, conf)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
testutil.WaitForLeader(t, agent.RPC, "dc1")
|
||||
|
||||
// Wait a little while for an update.
|
||||
time.Sleep(2 * conf.ConsulConfig.CoordinateUpdatePeriod)
|
||||
|
||||
// Make sure the coordinate is present.
|
||||
req := structs.DCSpecificRequest{
|
||||
Datacenter: agent.config.Datacenter,
|
||||
}
|
||||
var reply structs.IndexedCoordinates
|
||||
if err := agent.RPC("Coordinate.ListNodes", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if len(reply.Coordinates) != 1 {
|
||||
t.Fatalf("expected a coordinate: %v", reply)
|
||||
}
|
||||
coord := reply.Coordinates[0]
|
||||
if coord.Node != agent.config.NodeName || coord.Coord == nil {
|
||||
t.Fatalf("bad: %v", coord)
|
||||
}
|
||||
}
|
||||
`
|
||||
|
|
|
@ -18,14 +18,17 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
// This scale factor means we will add a minute after we
|
||||
// cross 128 nodes, another at 256, another at 512, etc.
|
||||
// By 8192 nodes, we will scale up by a factor of 8
|
||||
// This scale factor means we will add a minute after we cross 128 nodes,
|
||||
// another at 256, another at 512, etc. By 8192 nodes, we will scale up
|
||||
// by a factor of 8.
|
||||
//
|
||||
// If you update this, you may need to adjust the tuning of
|
||||
// CoordinateUpdatePeriod and CoordinateUpdateMaxBatchSize.
|
||||
aeScaleThreshold = 128
|
||||
)
|
||||
|
||||
// aeScale is used to scale the time interval at which anti-entropy
|
||||
// take place. It is used to prevent saturation as the cluster size grows
|
||||
// aeScale is used to scale the time interval at which anti-entropy updates take
|
||||
// place. It is used to prevent saturation as the cluster size grows.
|
||||
func aeScale(interval time.Duration, n int) time.Duration {
|
||||
// Don't scale until we cross the threshold
|
||||
if n <= aeScaleThreshold {
|
||||
|
@ -36,6 +39,17 @@ func aeScale(interval time.Duration, n int) time.Duration {
|
|||
return time.Duration(multiplier) * interval
|
||||
}
|
||||
|
||||
// rateScaledInterval is used to choose an interval to perform an action in order
|
||||
// to target an aggregate number of actions per second across the whole cluster.
|
||||
func rateScaledInterval(rate float64, min time.Duration, n int) time.Duration {
|
||||
interval := time.Duration(float64(time.Second) * float64(n) / rate)
|
||||
if interval < min {
|
||||
return min
|
||||
}
|
||||
|
||||
return interval
|
||||
}
|
||||
|
||||
// Returns a random stagger interval between 0 and the duration
|
||||
func randomStagger(intv time.Duration) time.Duration {
|
||||
return time.Duration(uint64(rand.Int63()) % uint64(intv))
|
||||
|
|
|
@ -24,6 +24,29 @@ func TestAEScale(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestRateScaledInterval(t *testing.T) {
|
||||
min := 1 * time.Second
|
||||
rate := 200.0
|
||||
if v := rateScaledInterval(rate, min, 0); v != min {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
if v := rateScaledInterval(rate, min, 100); v != min {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
if v := rateScaledInterval(rate, min, 200); v != 1*time.Second {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
if v := rateScaledInterval(rate, min, 1000); v != 5*time.Second {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
if v := rateScaledInterval(rate, min, 5000); v != 25*time.Second {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
if v := rateScaledInterval(rate, min, 10000); v != 50*time.Second {
|
||||
t.Fatalf("Bad: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomStagger(t *testing.T) {
|
||||
intv := time.Minute
|
||||
for i := 0; i < 10; i++ {
|
||||
|
|
184
command/rtt.go
Normal file
184
command/rtt.go
Normal file
|
@ -0,0 +1,184 @@
|
|||
package command
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/consul/api"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
// RTTCommand is a Command implementation that allows users to query the
|
||||
// estimated round trip time between nodes using network coordinates.
|
||||
type RTTCommand struct {
|
||||
Ui cli.Ui
|
||||
}
|
||||
|
||||
func (c *RTTCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: consul rtt [options] node1 [node2]
|
||||
|
||||
Estimates the round trip time between two nodes using Consul's network
|
||||
coordinate model of the cluster.
|
||||
|
||||
At least one node name is required. If the second node name isn't given, it
|
||||
is set to the agent's node name. Note that these are node names as known to
|
||||
Consul as "consul members" would show, not IP addresses.
|
||||
|
||||
By default, the two nodes are assumed to be nodes in the local datacenter
|
||||
and the LAN coordinates are used. If the -wan option is given, then the WAN
|
||||
coordinates are used, and the node names must be suffixed by a period and
|
||||
the datacenter (eg. "myserver.dc1").
|
||||
|
||||
It is not possible to measure between LAN coordinates and WAN coordinates
|
||||
because they are maintained by independent Serf gossip pools, so they are
|
||||
not compatible.
|
||||
|
||||
Options:
|
||||
|
||||
-wan Use WAN coordinates instead of LAN coordinates.
|
||||
-http-addr=127.0.0.1:8500 HTTP address of the Consul agent.
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
|
||||
func (c *RTTCommand) Run(args []string) int {
|
||||
var wan bool
|
||||
|
||||
cmdFlags := flag.NewFlagSet("rtt", flag.ContinueOnError)
|
||||
cmdFlags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
|
||||
cmdFlags.BoolVar(&wan, "wan", false, "wan")
|
||||
httpAddr := HTTPAddrFlag(cmdFlags)
|
||||
if err := cmdFlags.Parse(args); err != nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
// They must provide at least one node.
|
||||
nodes := cmdFlags.Args()
|
||||
if len(nodes) < 1 || len(nodes) > 2 {
|
||||
c.Ui.Error("One or two node names must be specified")
|
||||
c.Ui.Error("")
|
||||
c.Ui.Error(c.Help())
|
||||
return 1
|
||||
}
|
||||
|
||||
// Create and test the HTTP client.
|
||||
conf := api.DefaultConfig()
|
||||
conf.Address = *httpAddr
|
||||
client, err := api.NewClient(conf)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error connecting to Consul agent: %s", err))
|
||||
return 1
|
||||
}
|
||||
coordClient := client.Coordinate()
|
||||
|
||||
var source string
|
||||
var coord1, coord2 *coordinate.Coordinate
|
||||
if wan {
|
||||
source = "WAN"
|
||||
|
||||
// Default the second node to the agent if none was given.
|
||||
if len(nodes) < 2 {
|
||||
agent := client.Agent()
|
||||
self, err := agent.Self()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Unable to look up agent info: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
node, dc := self["Config"]["NodeName"], self["Config"]["Datacenter"]
|
||||
nodes = append(nodes, fmt.Sprintf("%s.%s", node, dc))
|
||||
}
|
||||
|
||||
// Parse the input nodes.
|
||||
parts1 := strings.Split(nodes[0], ".")
|
||||
parts2 := strings.Split(nodes[1], ".")
|
||||
if len(parts1) != 2 || len(parts2) != 2 {
|
||||
c.Ui.Error("Node names must be specified as <datacenter>.<node name> with -wan")
|
||||
return 1
|
||||
}
|
||||
node1, dc1 := parts1[0], parts1[1]
|
||||
node2, dc2 := parts2[0], parts2[1]
|
||||
|
||||
// Pull all the WAN coordinates.
|
||||
dcs, err := coordClient.Datacenters()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error getting coordinates: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// See if the requested nodes are in there.
|
||||
for _, dc := range dcs {
|
||||
for _, entry := range dc.Coordinates {
|
||||
if dc.Datacenter == dc1 && entry.Node == node1 {
|
||||
coord1 = entry.Coord
|
||||
}
|
||||
if dc.Datacenter == dc2 && entry.Node == node2 {
|
||||
coord2 = entry.Coord
|
||||
}
|
||||
|
||||
if coord1 != nil && coord2 != nil {
|
||||
goto SHOW_RTT
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
source = "LAN"
|
||||
|
||||
// Default the second node to the agent if none was given.
|
||||
if len(nodes) < 2 {
|
||||
agent := client.Agent()
|
||||
node, err := agent.NodeName()
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Unable to look up agent info: %s", err))
|
||||
return 1
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
||||
// Pull all the LAN coordinates.
|
||||
entries, _, err := coordClient.Nodes(nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error getting coordinates: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// See if the requested nodes are in there.
|
||||
for _, entry := range entries {
|
||||
if entry.Node == nodes[0] {
|
||||
coord1 = entry.Coord
|
||||
}
|
||||
if entry.Node == nodes[1] {
|
||||
coord2 = entry.Coord
|
||||
}
|
||||
|
||||
if coord1 != nil && coord2 != nil {
|
||||
goto SHOW_RTT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we found both coordinates.
|
||||
if coord1 == nil {
|
||||
c.Ui.Error(fmt.Sprintf("Could not find a coordinate for node %q", nodes[0]))
|
||||
return 1
|
||||
}
|
||||
if coord2 == nil {
|
||||
c.Ui.Error(fmt.Sprintf("Could not find a coordinate for node %q", nodes[1]))
|
||||
return 1
|
||||
}
|
||||
|
||||
SHOW_RTT:
|
||||
|
||||
// Report the round trip time.
|
||||
dist := fmt.Sprintf("%.3f ms", coord1.DistanceTo(coord2).Seconds()*1000.0)
|
||||
c.Ui.Output(fmt.Sprintf("Estimated %s <-> %s rtt: %s (using %s coordinates)", nodes[0], nodes[1], dist, source))
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *RTTCommand) Synopsis() string {
|
||||
return "Estimates network round trip time between nodes"
|
||||
}
|
215
command/rtt_test.go
Normal file
215
command/rtt_test.go
Normal file
|
@ -0,0 +1,215 @@
|
|||
package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/command/agent"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
func TestRTTCommand_Implements(t *testing.T) {
|
||||
var _ cli.Command = &RTTCommand{}
|
||||
}
|
||||
|
||||
func TestRTTCommand_Run_BadArgs(t *testing.T) {
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
|
||||
if code := c.Run([]string{}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
|
||||
if code := c.Run([]string{"node1", "node2", "node3"}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
|
||||
if code := c.Run([]string{"-wan", "node1", "node2"}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
|
||||
if code := c.Run([]string{"-wan", "node1.dc1", "node2"}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
|
||||
if code := c.Run([]string{"-wan", "node1", "node2.dc1"}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRTTCommand_Run_LAN(t *testing.T) {
|
||||
updatePeriod := 10 * time.Millisecond
|
||||
a := testAgentWithConfig(t, func(c *agent.Config) {
|
||||
c.ConsulConfig.CoordinateUpdatePeriod = updatePeriod
|
||||
})
|
||||
defer a.Shutdown()
|
||||
waitForLeader(t, a.httpAddr)
|
||||
|
||||
// Inject some known coordinates.
|
||||
c1 := coordinate.NewCoordinate(coordinate.DefaultConfig())
|
||||
c2 := c1.Clone()
|
||||
c2.Vec[0] = 0.123
|
||||
dist_str := fmt.Sprintf("%.3f ms", c1.DistanceTo(c2).Seconds()*1000.0)
|
||||
{
|
||||
req := structs.CoordinateUpdateRequest{
|
||||
Datacenter: a.config.Datacenter,
|
||||
Node: a.config.NodeName,
|
||||
Coord: c1,
|
||||
}
|
||||
var reply struct{}
|
||||
if err := a.agent.RPC("Coordinate.Update", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
{
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: a.config.Datacenter,
|
||||
Node: "dogs",
|
||||
Address: "127.0.0.2",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := a.agent.RPC("Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
{
|
||||
var reply struct{}
|
||||
req := structs.CoordinateUpdateRequest{
|
||||
Datacenter: a.config.Datacenter,
|
||||
Node: "dogs",
|
||||
Coord: c2,
|
||||
}
|
||||
if err := a.agent.RPC("Coordinate.Update", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the updates to get flushed to the data store.
|
||||
time.Sleep(2 * updatePeriod)
|
||||
|
||||
// Try two known nodes.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-http-addr=" + a.httpAddr,
|
||||
a.config.NodeName,
|
||||
"dogs",
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// Make sure the proper RTT was reported in the output.
|
||||
expected := fmt.Sprintf("rtt: %s", dist_str)
|
||||
if !strings.Contains(ui.OutputWriter.String(), expected) {
|
||||
t.Fatalf("bad: %#v", ui.OutputWriter.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Default to the agent's node.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-http-addr=" + a.httpAddr,
|
||||
"dogs",
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// Make sure the proper RTT was reported in the output.
|
||||
expected := fmt.Sprintf("rtt: %s", dist_str)
|
||||
if !strings.Contains(ui.OutputWriter.String(), expected) {
|
||||
t.Fatalf("bad: %#v", ui.OutputWriter.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Try an unknown node.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-http-addr=" + a.httpAddr,
|
||||
a.config.NodeName,
|
||||
"nope",
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 1 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRTTCommand_Run_WAN(t *testing.T) {
|
||||
a := testAgent(t)
|
||||
defer a.Shutdown()
|
||||
waitForLeader(t, a.httpAddr)
|
||||
|
||||
node := fmt.Sprintf("%s.%s", a.config.NodeName, a.config.Datacenter)
|
||||
|
||||
// We can't easily inject WAN coordinates, so we will just query the
|
||||
// node with itself.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-wan",
|
||||
"-http-addr=" + a.httpAddr,
|
||||
node,
|
||||
node,
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// Make sure there was some kind of RTT reported in the output.
|
||||
if !strings.Contains(ui.OutputWriter.String(), "rtt: ") {
|
||||
t.Fatalf("bad: %#v", ui.OutputWriter.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Default to the agent's node.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-wan",
|
||||
"-http-addr=" + a.httpAddr,
|
||||
node,
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
|
||||
// Make sure there was some kind of RTT reported in the output.
|
||||
if !strings.Contains(ui.OutputWriter.String(), "rtt: ") {
|
||||
t.Fatalf("bad: %#v", ui.OutputWriter.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Try an unknown node.
|
||||
{
|
||||
ui := new(cli.MockUi)
|
||||
c := &RTTCommand{Ui: ui}
|
||||
args := []string{
|
||||
"-wan",
|
||||
"-http-addr=" + a.httpAddr,
|
||||
node,
|
||||
"dc1.nope",
|
||||
}
|
||||
code := c.Run(args)
|
||||
if code != 1 {
|
||||
t.Fatalf("bad: %d: %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
}
|
||||
}
|
|
@ -114,6 +114,12 @@ func init() {
|
|||
}, nil
|
||||
},
|
||||
|
||||
"rtt": func() (cli.Command, error) {
|
||||
return &command.RTTCommand{
|
||||
Ui: ui,
|
||||
}, nil
|
||||
},
|
||||
|
||||
"version": func() (cli.Command, error) {
|
||||
ver := Version
|
||||
rel := VersionPrerelease
|
||||
|
|
|
@ -105,8 +105,13 @@ func (c *Catalog) ListDatacenters(args *struct{}, reply *[]string) error {
|
|||
dcs = append(dcs, dc)
|
||||
}
|
||||
|
||||
// Sort the DCs
|
||||
// TODO - do we want to control the sort behavior with an argument?
|
||||
|
||||
// Sort the DCs by name first, then apply a stable sort by distance.
|
||||
sort.Strings(dcs)
|
||||
if err := c.srv.sortDatacentersByDistance(dcs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Return
|
||||
*reply = dcs
|
||||
|
@ -132,7 +137,7 @@ func (c *Catalog) ListNodes(args *structs.DCSpecificRequest, reply *structs.Inde
|
|||
}
|
||||
|
||||
reply.Index, reply.Nodes = index, nodes
|
||||
return nil
|
||||
return c.srv.sortNodesByDistanceFrom(args.Source, reply.Nodes)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -189,7 +194,10 @@ func (c *Catalog) ServiceNodes(args *structs.ServiceSpecificRequest, reply *stru
|
|||
return err
|
||||
}
|
||||
reply.Index, reply.ServiceNodes = index, services
|
||||
return c.srv.filterACL(args.Token, reply)
|
||||
if err := c.srv.filterACL(args.Token, reply); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.srv.sortNodesByDistanceFrom(args.Source, reply.ServiceNodes)
|
||||
})
|
||||
|
||||
// Provide some metrics
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"fmt"
|
||||
"net/rpc"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -234,9 +233,7 @@ func TestCatalogListDatacenters(t *testing.T) {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Sort the dcs
|
||||
sort.Strings(out)
|
||||
|
||||
// The DCs should come out sorted by default.
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
|
@ -248,6 +245,57 @@ func TestCatalogListDatacenters(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCatalogListDatacenters_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
dir2, s2 := testServerDC(t, "dc2")
|
||||
defer os.RemoveAll(dir2)
|
||||
defer s2.Shutdown()
|
||||
|
||||
dir3, s3 := testServerDC(t, "acdc")
|
||||
defer os.RemoveAll(dir3)
|
||||
defer s3.Shutdown()
|
||||
|
||||
// Try to join
|
||||
addr := fmt.Sprintf("127.0.0.1:%d",
|
||||
s1.config.SerfWANConfig.MemberlistConfig.BindPort)
|
||||
if _, err := s2.JoinWAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := s3.JoinWAN([]string{addr}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
var out []string
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.ListDatacenters", struct{}{}, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// It's super hard to force the Serfs into a known configuration of
|
||||
// coordinates, so the best we can do is make sure that the sorting
|
||||
// function is getting called (it's tested extensively in rtt_test.go).
|
||||
// Since this is relative to dc1, it will be listed first (proving we
|
||||
// went into the sort fn) and the other two will be sorted by name since
|
||||
// there are no known coordinates for them.
|
||||
if len(out) != 3 {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out[0] != "dc1" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out[1] != "acdc" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out[2] != "dc2" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatalogListNodes(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
@ -456,6 +504,94 @@ func TestCatalogListNodes_ConsistentRead(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCatalogListNodes_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
if err := s1.fsm.State().EnsureNode(1, &structs.Node{Node: "aaa", Address: "127.0.0.1"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(2, &structs.Node{Node: "foo", Address: "127.0.0.2"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(3, &structs.Node{Node: "bar", Address: "127.0.0.3"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(4, &structs.Node{Node: "baz", Address: "127.0.0.4"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Set all but one of the nodes to known coordinates.
|
||||
updates := structs.Coordinates{
|
||||
{"foo", generateCoordinate(2 * time.Millisecond)},
|
||||
{"bar", generateCoordinate(5 * time.Millisecond)},
|
||||
{"baz", generateCoordinate(1 * time.Millisecond)},
|
||||
}
|
||||
if err := s1.fsm.State().CoordinateBatchUpdate(5, updates); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Query with no given source node, should get the natural order from
|
||||
// the index.
|
||||
args := structs.DCSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
}
|
||||
var out structs.IndexedNodes
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
msgpackrpc.CallWithCodec(codec, "Catalog.ListNodes", &args, &out)
|
||||
return len(out.Nodes) == 5, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
if out.Nodes[0].Node != "aaa" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[2].Node != "baz" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[3].Node != "foo" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[4].Node != s1.config.NodeName {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
|
||||
// Query relative to foo, note that there's no known coordinate for the
|
||||
// default-added Serf node nor "aaa" so they will go at the end.
|
||||
args = structs.DCSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
Source: structs.QuerySource{Datacenter: "dc1", Node: "foo"},
|
||||
}
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
msgpackrpc.CallWithCodec(codec, "Catalog.ListNodes", &args, &out)
|
||||
return len(out.Nodes) == 5, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
if out.Nodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[1].Node != "baz" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[2].Node != "bar" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[3].Node != "aaa" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.Nodes[4].Node != s1.config.NodeName {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCatalogListNodes(t *testing.B) {
|
||||
dir1, s1 := testServer(nil)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
@ -714,6 +850,93 @@ func TestCatalogListServiceNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCatalogListServiceNodes_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
args := structs.ServiceSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
ServiceName: "db",
|
||||
}
|
||||
var out structs.IndexedServiceNodes
|
||||
err := msgpackrpc.CallWithCodec(codec, "Catalog.ServiceNodes", &args, &out)
|
||||
if err == nil || err.Error() != "No cluster leader" {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// Add a few nodes for the associated services.
|
||||
s1.fsm.State().EnsureNode(1, &structs.Node{Node: "aaa", Address: "127.0.0.1"})
|
||||
s1.fsm.State().EnsureService(2, "aaa", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"primary"}, Address: "127.0.0.1", Port: 5000})
|
||||
s1.fsm.State().EnsureNode(3, &structs.Node{Node: "foo", Address: "127.0.0.2"})
|
||||
s1.fsm.State().EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"primary"}, Address: "127.0.0.2", Port: 5000})
|
||||
s1.fsm.State().EnsureNode(5, &structs.Node{Node: "bar", Address: "127.0.0.3"})
|
||||
s1.fsm.State().EnsureService(6, "bar", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"primary"}, Address: "127.0.0.3", Port: 5000})
|
||||
s1.fsm.State().EnsureNode(7, &structs.Node{Node: "baz", Address: "127.0.0.4"})
|
||||
s1.fsm.State().EnsureService(8, "baz", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"primary"}, Address: "127.0.0.4", Port: 5000})
|
||||
|
||||
// Set all but one of the nodes to known coordinates.
|
||||
updates := structs.Coordinates{
|
||||
{"foo", generateCoordinate(2 * time.Millisecond)},
|
||||
{"bar", generateCoordinate(5 * time.Millisecond)},
|
||||
{"baz", generateCoordinate(1 * time.Millisecond)},
|
||||
}
|
||||
if err := s1.fsm.State().CoordinateBatchUpdate(9, updates); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Query with no given source node, should get the natural order from
|
||||
// the index.
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.ServiceNodes", &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(out.ServiceNodes) != 4 {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[0].Node != "aaa" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[1].Node != "bar" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[2].Node != "baz" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[3].Node != "foo" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
|
||||
// Query relative to foo, note that there's no known coordinate for "aaa"
|
||||
// so it will go at the end.
|
||||
args = structs.ServiceSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
ServiceName: "db",
|
||||
Source: structs.QuerySource{Datacenter: "dc1", Node: "foo"},
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.ServiceNodes", &args, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(out.ServiceNodes) != 4 {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[0].Node != "foo" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[1].Node != "baz" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[2].Node != "bar" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
if out.ServiceNodes[3].Node != "aaa" {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCatalogNodeServices(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
|
@ -138,6 +139,7 @@ func (c *Client) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (
|
|||
conf.ProtocolVersion = protocolVersionMap[c.config.ProtocolVersion]
|
||||
conf.RejoinAfterLeave = c.config.RejoinAfterLeave
|
||||
conf.Merge = &lanMergeDelegate{dc: c.config.Datacenter}
|
||||
conf.DisableCoordinates = c.config.DisableCoordinates
|
||||
if err := ensurePath(conf.SnapshotPath, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -376,3 +378,9 @@ func (c *Client) Stats() map[string]map[string]string {
|
|||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
// GetCoordinate returns the network coordinate of the current node, as
|
||||
// maintained by Serf.
|
||||
func (c *Client) GetCoordinate() (*coordinate.Coordinate, error) {
|
||||
return c.serf.GetCoordinate()
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ func init() {
|
|||
protocolVersionMap = map[uint8]uint8{
|
||||
1: 4,
|
||||
2: 4,
|
||||
3: 4,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,6 +203,24 @@ type Config struct {
|
|||
// UserEventHandler callback can be used to handle incoming
|
||||
// user events. This function should not block.
|
||||
UserEventHandler func(serf.UserEvent)
|
||||
|
||||
// DisableCoordinates controls features related to network coordinates.
|
||||
DisableCoordinates bool
|
||||
|
||||
// CoordinateUpdatePeriod controls how long a server batches coordinate
|
||||
// updates before applying them in a Raft transaction. A larger period
|
||||
// leads to fewer Raft transactions, but also the stored coordinates
|
||||
// being more stale.
|
||||
CoordinateUpdatePeriod time.Duration
|
||||
|
||||
// CoordinateUpdateBatchSize controls the maximum number of updates a
|
||||
// server batches before applying them in a Raft transaction.
|
||||
CoordinateUpdateBatchSize int
|
||||
|
||||
// CoordinateUpdateMaxBatches controls the maximum number of batches we
|
||||
// are willing to apply in one period. After this limit we will issue a
|
||||
// warning and discard the remaining updates.
|
||||
CoordinateUpdateMaxBatches int
|
||||
}
|
||||
|
||||
// CheckVersion is used to check if the ProtocolVersion is valid
|
||||
|
@ -249,13 +268,21 @@ func DefaultConfig() *Config {
|
|||
SerfLANConfig: serf.DefaultConfig(),
|
||||
SerfWANConfig: serf.DefaultConfig(),
|
||||
ReconcileInterval: 60 * time.Second,
|
||||
ProtocolVersion: ProtocolVersionMax,
|
||||
ProtocolVersion: ProtocolVersion2Compatible,
|
||||
ACLTTL: 30 * time.Second,
|
||||
ACLDefaultPolicy: "allow",
|
||||
ACLDownPolicy: "extend-cache",
|
||||
TombstoneTTL: 15 * time.Minute,
|
||||
TombstoneTTLGranularity: 30 * time.Second,
|
||||
SessionTTLMin: 10 * time.Second,
|
||||
DisableCoordinates: false,
|
||||
|
||||
// These are tuned to provide a total throughput of 128 updates
|
||||
// per second. If you update these, you should update the client-
|
||||
// side SyncCoordinateRateTarget parameter accordingly.
|
||||
CoordinateUpdatePeriod: 5 * time.Second,
|
||||
CoordinateUpdateBatchSize: 128,
|
||||
CoordinateUpdateMaxBatches: 5,
|
||||
}
|
||||
|
||||
// Increase our reap interval to 3 days instead of 24h.
|
||||
|
|
170
consul/coordinate_endpoint.go
Normal file
170
consul/coordinate_endpoint.go
Normal file
|
@ -0,0 +1,170 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// Coordinate manages queries and updates for network coordinates.
|
||||
type Coordinate struct {
|
||||
// srv is a pointer back to the server.
|
||||
srv *Server
|
||||
|
||||
// updates holds pending coordinate updates for the given nodes.
|
||||
updates map[string]*coordinate.Coordinate
|
||||
|
||||
// updatesLock synchronizes access to the updates map.
|
||||
updatesLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewCoordinate returns a new Coordinate endpoint.
|
||||
func NewCoordinate(srv *Server) *Coordinate {
|
||||
c := &Coordinate{
|
||||
srv: srv,
|
||||
updates: make(map[string]*coordinate.Coordinate),
|
||||
}
|
||||
|
||||
go c.batchUpdate()
|
||||
return c
|
||||
}
|
||||
|
||||
// batchUpdate is a long-running routine that flushes pending coordinates to the
|
||||
// Raft log in batches.
|
||||
func (c *Coordinate) batchUpdate() {
|
||||
for {
|
||||
select {
|
||||
case <-time.After(c.srv.config.CoordinateUpdatePeriod):
|
||||
if err := c.batchApplyUpdates(); err != nil {
|
||||
c.srv.logger.Printf("[ERR] consul.coordinate: Batch update failed: %v", err)
|
||||
}
|
||||
case <-c.srv.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// batchApplyUpdates applies all pending updates to the Raft log in a series of
|
||||
// batches.
|
||||
func (c *Coordinate) batchApplyUpdates() error {
|
||||
// Grab the pending updates and release the lock so we can still handle
|
||||
// incoming messages.
|
||||
c.updatesLock.Lock()
|
||||
pending := c.updates
|
||||
c.updates = make(map[string]*coordinate.Coordinate)
|
||||
c.updatesLock.Unlock()
|
||||
|
||||
// Enforce the rate limit.
|
||||
limit := c.srv.config.CoordinateUpdateBatchSize * c.srv.config.CoordinateUpdateMaxBatches
|
||||
size := len(pending)
|
||||
if size > limit {
|
||||
c.srv.logger.Printf("[WARN] consul.coordinate: Discarded %d coordinate updates", size-limit)
|
||||
size = limit
|
||||
}
|
||||
|
||||
// Transform the map into a slice that we can feed to the Raft log in
|
||||
// batches.
|
||||
i := 0
|
||||
updates := make(structs.Coordinates, size)
|
||||
for node, coord := range pending {
|
||||
if !(i < size) {
|
||||
break
|
||||
}
|
||||
|
||||
updates[i] = &structs.Coordinate{node, coord}
|
||||
i++
|
||||
}
|
||||
|
||||
// Apply the updates to the Raft log in batches.
|
||||
for start := 0; start < size; start += c.srv.config.CoordinateUpdateBatchSize {
|
||||
end := start + c.srv.config.CoordinateUpdateBatchSize
|
||||
if end > size {
|
||||
end = size
|
||||
}
|
||||
|
||||
slice := updates[start:end]
|
||||
if _, err := c.srv.raftApply(structs.CoordinateBatchUpdateType, slice); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update inserts or updates the LAN coordinate of a node.
|
||||
func (c *Coordinate) Update(args *structs.CoordinateUpdateRequest, reply *struct{}) (err error) {
|
||||
if done, err := c.srv.forward("Coordinate.Update", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
// Since this is a coordinate coming from some place else we harden this
|
||||
// and look for dimensionality problems proactively.
|
||||
coord, err := c.srv.serfLAN.GetCoordinate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !coord.IsCompatibleWith(args.Coord) {
|
||||
return fmt.Errorf("rejected bad coordinate: %v", args.Coord)
|
||||
}
|
||||
|
||||
// Add the coordinate to the map of pending updates.
|
||||
c.updatesLock.Lock()
|
||||
c.updates[args.Node] = args.Coord
|
||||
c.updatesLock.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListDatacenters returns the list of datacenters and their respective nodes
|
||||
// and the raw coordinates of those nodes (if no coordinates are available for
|
||||
// any of the nodes, the node list may be empty).
|
||||
func (c *Coordinate) ListDatacenters(args *struct{}, reply *[]structs.DatacenterMap) error {
|
||||
c.srv.remoteLock.RLock()
|
||||
defer c.srv.remoteLock.RUnlock()
|
||||
|
||||
// Build up a map of all the DCs, sort it first since getDatacenterMaps
|
||||
// will preserve the order of this list in the output.
|
||||
dcs := make([]string, 0, len(c.srv.remoteConsuls))
|
||||
for dc := range c.srv.remoteConsuls {
|
||||
dcs = append(dcs, dc)
|
||||
}
|
||||
sort.Strings(dcs)
|
||||
maps := c.srv.getDatacenterMaps(dcs)
|
||||
|
||||
// Strip the datacenter suffixes from all the node names.
|
||||
for i := range maps {
|
||||
suffix := fmt.Sprintf(".%s", maps[i].Datacenter)
|
||||
for j := range maps[i].Coordinates {
|
||||
node := maps[i].Coordinates[j].Node
|
||||
maps[i].Coordinates[j].Node = strings.TrimSuffix(node, suffix)
|
||||
}
|
||||
}
|
||||
|
||||
*reply = maps
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListNodes returns the list of nodes with their raw network coordinates (if no
|
||||
// coordinates are available for a node it won't appear in this list).
|
||||
func (c *Coordinate) ListNodes(args *structs.DCSpecificRequest, reply *structs.IndexedCoordinates) error {
|
||||
if done, err := c.srv.forward("Coordinate.ListNodes", args, args, reply); done {
|
||||
return err
|
||||
}
|
||||
|
||||
state := c.srv.fsm.State()
|
||||
return c.srv.blockingRPC(&args.QueryOptions,
|
||||
&reply.QueryMeta,
|
||||
state.GetQueryWatch("Coordinates"),
|
||||
func() error {
|
||||
index, coords, err := state.Coordinates()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reply.Index, reply.Coordinates = index, coords
|
||||
return nil
|
||||
})
|
||||
}
|
291
consul/coordinate_endpoint_test.go
Normal file
291
consul/coordinate_endpoint_test.go
Normal file
|
@ -0,0 +1,291 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// generateRandomCoordinate creates a random coordinate. This mucks with the
|
||||
// underlying structure directly, so it's not really useful for any particular
|
||||
// position in the network, but it's a good payload to send through to make
|
||||
// sure things come out the other side or get stored correctly.
|
||||
func generateRandomCoordinate() *coordinate.Coordinate {
|
||||
config := coordinate.DefaultConfig()
|
||||
coord := coordinate.NewCoordinate(config)
|
||||
for i := range coord.Vec {
|
||||
coord.Vec[i] = rand.NormFloat64()
|
||||
}
|
||||
coord.Error = rand.NormFloat64()
|
||||
coord.Adjustment = rand.NormFloat64()
|
||||
return coord
|
||||
}
|
||||
|
||||
// verifyCoordinatesEqual will compare a and b and fail if they are not exactly
|
||||
// equal (no floating point fuzz is considered since we are trying to make sure
|
||||
// we are getting exactly the coordinates we expect, without math on them).
|
||||
func verifyCoordinatesEqual(t *testing.T, a, b *coordinate.Coordinate) {
|
||||
if !reflect.DeepEqual(a, b) {
|
||||
t.Fatalf("coordinates are not equal: %v != %v", a, b)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoordinate_Update(t *testing.T) {
|
||||
name := fmt.Sprintf("Node %d", getPort())
|
||||
dir1, config1 := testServerConfig(t, name)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
||||
config1.CoordinateUpdatePeriod = 500 * time.Millisecond
|
||||
config1.CoordinateUpdateBatchSize = 5
|
||||
config1.CoordinateUpdateMaxBatches = 2
|
||||
s1, err := NewServer(config1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer s1.Shutdown()
|
||||
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// Register some nodes.
|
||||
nodes := []string{"node1", "node2"}
|
||||
for _, node := range nodes {
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: node,
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Send an update for the first node.
|
||||
arg1 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Send an update for the second node.
|
||||
arg2 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node2",
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg2, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the updates did not yet apply because the update period
|
||||
// hasn't expired.
|
||||
state := s1.fsm.State()
|
||||
c, err := state.CoordinateGetRaw("node1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if c != nil {
|
||||
t.Fatalf("should be nil because the update should be batched")
|
||||
}
|
||||
c, err = state.CoordinateGetRaw("node2")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if c != nil {
|
||||
t.Fatalf("should be nil because the update should be batched")
|
||||
}
|
||||
|
||||
// Send another update for the second node. It should take precedence
|
||||
// since there will be two updates in the same batch.
|
||||
arg2.Coord = generateRandomCoordinate()
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg2, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Wait a while and the updates should get picked up.
|
||||
time.Sleep(2 * s1.config.CoordinateUpdatePeriod)
|
||||
c, err = state.CoordinateGetRaw("node1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if c == nil {
|
||||
t.Fatalf("should return a coordinate but it's nil")
|
||||
}
|
||||
verifyCoordinatesEqual(t, c, arg1.Coord)
|
||||
c, err = state.CoordinateGetRaw("node2")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if c == nil {
|
||||
t.Fatalf("should return a coordinate but it's nil")
|
||||
}
|
||||
verifyCoordinatesEqual(t, c, arg2.Coord)
|
||||
|
||||
// Register a bunch of additional nodes.
|
||||
spamLen := s1.config.CoordinateUpdateBatchSize*s1.config.CoordinateUpdateMaxBatches + 1
|
||||
for i := 0; i < spamLen; i++ {
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: fmt.Sprintf("bogusnode%d", i),
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Now spam some coordinate updates and make sure it starts throwing
|
||||
// them away if they exceed the batch allowance. Note we have to make
|
||||
// unique names since these are held in map by node name.
|
||||
for i := 0; i < spamLen; i++ {
|
||||
arg1.Node = fmt.Sprintf("bogusnode%d", i)
|
||||
arg1.Coord = generateRandomCoordinate()
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait a little while for the batch routine to run, then make sure
|
||||
// exactly one of the updates got dropped (we won't know which one).
|
||||
time.Sleep(2 * s1.config.CoordinateUpdatePeriod)
|
||||
numDropped := 0
|
||||
for i := 0; i < spamLen; i++ {
|
||||
c, err = state.CoordinateGetRaw(fmt.Sprintf("bogusnode%d", i))
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if c == nil {
|
||||
numDropped++
|
||||
}
|
||||
}
|
||||
if numDropped != 1 {
|
||||
t.Fatalf("wrong number of coordinates dropped, %d != 1", numDropped)
|
||||
}
|
||||
|
||||
// Finally, send a coordinate with the wrong dimensionality to make sure
|
||||
// there are no panics, and that it gets rejected.
|
||||
arg2.Coord.Vec = make([]float64, 2*len(arg2.Coord.Vec))
|
||||
err = msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg2, &out)
|
||||
if err == nil || !strings.Contains(err.Error(), "rejected bad coordinate") {
|
||||
t.Fatalf("should have failed with an error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoordinate_ListDatacenters(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// It's super hard to force the Serfs into a known configuration of
|
||||
// coordinates, so the best we can do is make sure our own DC shows
|
||||
// up in the list with the proper coordinates. The guts of the algorithm
|
||||
// are extensively tested in rtt_test.go using a mock database.
|
||||
var out []structs.DatacenterMap
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.ListDatacenters", struct{}{}, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(out) != 1 ||
|
||||
out[0].Datacenter != "dc1" ||
|
||||
len(out[0].Coordinates) != 1 ||
|
||||
out[0].Coordinates[0].Node != s1.config.NodeName {
|
||||
t.Fatalf("bad: %v", out)
|
||||
}
|
||||
c, err := s1.serfWAN.GetCoordinate()
|
||||
if err != nil {
|
||||
t.Fatalf("bad: %v", err)
|
||||
}
|
||||
verifyCoordinatesEqual(t, c, out[0].Coordinates[0].Coord)
|
||||
}
|
||||
|
||||
func TestCoordinate_ListNodes(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
|
||||
// Register some nodes.
|
||||
nodes := []string{"foo", "bar", "baz"}
|
||||
for _, node := range nodes {
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: node,
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Send coordinate updates for a few nodes, waiting a little while for
|
||||
// the batch update to run.
|
||||
arg1 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg1, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg2 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "bar",
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg2, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg3 := structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "baz",
|
||||
Coord: generateRandomCoordinate(),
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &arg3, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
time.Sleep(2 * s1.config.CoordinateUpdatePeriod)
|
||||
|
||||
// Now query back for all the nodes.
|
||||
arg := structs.DCSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
}
|
||||
resp := structs.IndexedCoordinates{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.ListNodes", &arg, &resp); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(resp.Coordinates) != 3 ||
|
||||
resp.Coordinates[0].Node != "bar" ||
|
||||
resp.Coordinates[1].Node != "baz" ||
|
||||
resp.Coordinates[2].Node != "foo" {
|
||||
t.Fatalf("bad: %v", resp.Coordinates)
|
||||
}
|
||||
verifyCoordinatesEqual(t, resp.Coordinates[0].Coord, arg2.Coord) // bar
|
||||
verifyCoordinatesEqual(t, resp.Coordinates[1].Coord, arg3.Coord) // baz
|
||||
verifyCoordinatesEqual(t, resp.Coordinates[2].Coord, arg1.Coord) // foo
|
||||
}
|
|
@ -89,6 +89,8 @@ func (c *consulFSM) Apply(log *raft.Log) interface{} {
|
|||
return c.applyACLOperation(buf[1:], log.Index)
|
||||
case structs.TombstoneRequestType:
|
||||
return c.applyTombstoneOperation(buf[1:], log.Index)
|
||||
case structs.CoordinateBatchUpdateType:
|
||||
return c.applyCoordinateBatchUpdate(buf[1:], log.Index)
|
||||
default:
|
||||
if ignoreUnknown {
|
||||
c.logger.Printf("[WARN] consul.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType)
|
||||
|
@ -246,6 +248,22 @@ func (c *consulFSM) applyTombstoneOperation(buf []byte, index uint64) interface{
|
|||
}
|
||||
}
|
||||
|
||||
// applyCoordinateBatchUpdate processes a batch of coordinate updates and applies
|
||||
// them in a single underlying transaction. This interface isn't 1:1 with the outer
|
||||
// update interface that the coordinate endpoint exposes, so we made it single
|
||||
// purpose and avoided the opcode convention.
|
||||
func (c *consulFSM) applyCoordinateBatchUpdate(buf []byte, index uint64) interface{} {
|
||||
var updates structs.Coordinates
|
||||
if err := structs.Decode(buf, &updates); err != nil {
|
||||
panic(fmt.Errorf("failed to decode batch updates: %v", err))
|
||||
}
|
||||
defer metrics.MeasureSince([]string{"consul", "fsm", "coordinate", "batch-update"}, time.Now())
|
||||
if err := c.state.CoordinateBatchUpdate(index, updates); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *consulFSM) Snapshot() (raft.FSMSnapshot, error) {
|
||||
defer func(start time.Time) {
|
||||
c.logger.Printf("[INFO] consul.fsm: snapshot created in %v", time.Now().Sub(start))
|
||||
|
@ -343,6 +361,16 @@ func (c *consulFSM) Restore(old io.ReadCloser) error {
|
|||
return err
|
||||
}
|
||||
|
||||
case structs.CoordinateBatchUpdateType:
|
||||
var req structs.Coordinates
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
|
||||
}
|
||||
if err := restore.Coordinates(header.LastIndex, req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("Unrecognized msg type: %v", msgType)
|
||||
}
|
||||
|
@ -444,6 +472,21 @@ func (s *consulSnapshot) persistNodes(sink raft.SnapshotSink,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the coordinates separately since they are not part of the
|
||||
// register request interface. To avoid copying them out, we turn
|
||||
// them into batches with a single coordinate each.
|
||||
coords, err := s.state.Coordinates()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for coord := coords.Next(); coord != nil; coord = coords.Next() {
|
||||
sink.Write([]byte{byte(structs.CoordinateBatchUpdateType)})
|
||||
updates := structs.Coordinates{coord.(*structs.Coordinate)}
|
||||
if err := encoder.Encode(&updates); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ package consul
|
|||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/consul/state"
|
||||
|
@ -382,6 +383,20 @@ func TestFSM_SnapshotRestore(t *testing.T) {
|
|||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "baz",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
&structs.Coordinate{
|
||||
Node: "foo",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := fsm.state.CoordinateBatchUpdate(13, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Snapshot
|
||||
snap, err := fsm.Snapshot()
|
||||
if err != nil {
|
||||
|
@ -490,6 +505,15 @@ func TestFSM_SnapshotRestore(t *testing.T) {
|
|||
t.Fatalf("unexpected extra tombstones")
|
||||
}
|
||||
}()
|
||||
|
||||
// Verify coordinates are restored
|
||||
_, coords, err := fsm2.state.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(coords, updates) {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFSM_KVSSet(t *testing.T) {
|
||||
|
@ -728,6 +752,46 @@ func TestFSM_KVSCheckAndSet(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestFSM_CoordinateUpdate(t *testing.T) {
|
||||
fsm, err := NewFSM(nil, os.Stderr)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Register some nodes.
|
||||
fsm.state.EnsureNode(1, &structs.Node{Node: "node1", Address: "127.0.0.1"})
|
||||
fsm.state.EnsureNode(2, &structs.Node{Node: "node2", Address: "127.0.0.1"})
|
||||
|
||||
// Write a batch of two coordinates.
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
&structs.Coordinate{
|
||||
Node: "node2",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
buf, err := structs.Encode(structs.CoordinateBatchUpdateType, updates)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
resp := fsm.Apply(makeLog(buf))
|
||||
if resp != nil {
|
||||
t.Fatalf("resp: %v", resp)
|
||||
}
|
||||
|
||||
// Read back the two coordinates to make sure they got updated.
|
||||
_, coords, err := fsm.state.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(coords, updates) {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFSM_SessionCreate_Destroy(t *testing.T) {
|
||||
fsm, err := NewFSM(nil, os.Stderr)
|
||||
if err != nil {
|
||||
|
|
|
@ -30,7 +30,10 @@ func (h *Health) ChecksInState(args *structs.ChecksInStateRequest,
|
|||
return err
|
||||
}
|
||||
reply.Index, reply.HealthChecks = index, checks
|
||||
return h.srv.filterACL(args.Token, reply)
|
||||
if err := h.srv.filterACL(args.Token, reply); err != nil {
|
||||
return err
|
||||
}
|
||||
return h.srv.sortNodesByDistanceFrom(args.Source, reply.HealthChecks)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -82,7 +85,10 @@ func (h *Health) ServiceChecks(args *structs.ServiceSpecificRequest,
|
|||
return err
|
||||
}
|
||||
reply.Index, reply.HealthChecks = index, checks
|
||||
return h.srv.filterACL(args.Token, reply)
|
||||
if err := h.srv.filterACL(args.Token, reply); err != nil {
|
||||
return err
|
||||
}
|
||||
return h.srv.sortNodesByDistanceFrom(args.Source, reply.HealthChecks)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -115,8 +121,12 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reply.Index, reply.Nodes = index, nodes
|
||||
return h.srv.filterACL(args.Token, reply)
|
||||
if err := h.srv.filterACL(args.Token, reply); err != nil {
|
||||
return err
|
||||
}
|
||||
return h.srv.sortNodesByDistanceFrom(args.Source, reply.Nodes)
|
||||
})
|
||||
|
||||
// Provide some metrics
|
||||
|
|
|
@ -3,6 +3,7 @@ package consul
|
|||
import (
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
|
@ -55,6 +56,83 @@ func TestHealth_ChecksInState(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHealth_ChecksInState_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
if err := s1.fsm.State().EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.2"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.3"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
updates := structs.Coordinates{
|
||||
{"foo", generateCoordinate(1 * time.Millisecond)},
|
||||
{"bar", generateCoordinate(2 * time.Millisecond)},
|
||||
}
|
||||
if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Address: "127.0.0.1",
|
||||
Check: &structs.HealthCheck{
|
||||
Name: "memory utilization",
|
||||
Status: structs.HealthPassing,
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Query relative to foo to make sure it shows up first in the list.
|
||||
var out2 structs.IndexedHealthChecks
|
||||
inState := structs.ChecksInStateRequest{
|
||||
Datacenter: "dc1",
|
||||
State: structs.HealthPassing,
|
||||
Source: structs.QuerySource{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
},
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ChecksInState", &inState, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
checks := out2.HealthChecks
|
||||
if len(checks) != 3 {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[0].Node != "foo" {
|
||||
t.Fatalf("Bad: %v", checks[1])
|
||||
}
|
||||
|
||||
// Now query relative to bar to make sure it shows up first.
|
||||
inState.Source.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ChecksInState", &inState, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
checks = out2.HealthChecks
|
||||
if len(checks) != 3 {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[0].Node != "bar" {
|
||||
t.Fatalf("Bad: %v", checks[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealth_NodeChecks(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
@ -142,6 +220,94 @@ func TestHealth_ServiceChecks(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHealth_ServiceChecks_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
if err := s1.fsm.State().EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.2"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.3"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
updates := structs.Coordinates{
|
||||
{"foo", generateCoordinate(1 * time.Millisecond)},
|
||||
{"bar", generateCoordinate(2 * time.Millisecond)},
|
||||
}
|
||||
if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Address: "127.0.0.1",
|
||||
Service: &structs.NodeService{
|
||||
ID: "db",
|
||||
Service: "db",
|
||||
},
|
||||
Check: &structs.HealthCheck{
|
||||
Name: "db connect",
|
||||
Status: structs.HealthPassing,
|
||||
ServiceID: "db",
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Query relative to foo to make sure it shows up first in the list.
|
||||
var out2 structs.IndexedHealthChecks
|
||||
node := structs.ServiceSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
ServiceName: "db",
|
||||
Source: structs.QuerySource{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
},
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ServiceChecks", &node, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
checks := out2.HealthChecks
|
||||
if len(checks) != 2 {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[0].Node != "foo" {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[1].Node != "bar" {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
|
||||
// Now query relative to bar to make sure it shows up first.
|
||||
node.Source.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ServiceChecks", &node, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
checks = out2.HealthChecks
|
||||
if len(checks) != 2 {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[0].Node != "bar" {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
if checks[1].Node != "foo" {
|
||||
t.Fatalf("Bad: %v", checks)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealth_ServiceNodes(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
|
@ -225,6 +391,94 @@ func TestHealth_ServiceNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHealth_ServiceNodes_DistanceSort(t *testing.T) {
|
||||
dir1, s1 := testServer(t)
|
||||
defer os.RemoveAll(dir1)
|
||||
defer s1.Shutdown()
|
||||
codec := rpcClient(t, s1)
|
||||
defer codec.Close()
|
||||
|
||||
testutil.WaitForLeader(t, s1.RPC, "dc1")
|
||||
if err := s1.fsm.State().EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.2"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := s1.fsm.State().EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.3"}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
updates := structs.Coordinates{
|
||||
{"foo", generateCoordinate(1 * time.Millisecond)},
|
||||
{"bar", generateCoordinate(2 * time.Millisecond)},
|
||||
}
|
||||
if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
Address: "127.0.0.1",
|
||||
Service: &structs.NodeService{
|
||||
ID: "db",
|
||||
Service: "db",
|
||||
},
|
||||
Check: &structs.HealthCheck{
|
||||
Name: "db connect",
|
||||
Status: structs.HealthPassing,
|
||||
ServiceID: "db",
|
||||
},
|
||||
}
|
||||
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
arg.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Query relative to foo to make sure it shows up first in the list.
|
||||
var out2 structs.IndexedCheckServiceNodes
|
||||
req := structs.ServiceSpecificRequest{
|
||||
Datacenter: "dc1",
|
||||
ServiceName: "db",
|
||||
Source: structs.QuerySource{
|
||||
Datacenter: "dc1",
|
||||
Node: "foo",
|
||||
},
|
||||
}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ServiceNodes", &req, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
nodes := out2.Nodes
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("Bad: %v", nodes)
|
||||
}
|
||||
if nodes[0].Node.Node != "foo" {
|
||||
t.Fatalf("Bad: %v", nodes[0])
|
||||
}
|
||||
if nodes[1].Node.Node != "bar" {
|
||||
t.Fatalf("Bad: %v", nodes[1])
|
||||
}
|
||||
|
||||
// Now query relative to bar to make sure it shows up first.
|
||||
req.Source.Node = "bar"
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Health.ServiceNodes", &req, &out2); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
nodes = out2.Nodes
|
||||
if len(nodes) != 2 {
|
||||
t.Fatalf("Bad: %v", nodes)
|
||||
}
|
||||
if nodes[0].Node.Node != "bar" {
|
||||
t.Fatalf("Bad: %v", nodes[0])
|
||||
}
|
||||
if nodes[1].Node.Node != "foo" {
|
||||
t.Fatalf("Bad: %v", nodes[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealth_NodeChecks_FilterACL(t *testing.T) {
|
||||
dir, token, srv, codec := testACLFilterServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
|
398
consul/rtt.go
Normal file
398
consul/rtt.go
Normal file
|
@ -0,0 +1,398 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// computeDistance returns the distance between the two network coordinates in
|
||||
// seconds. If either of the coordinates is nil then this will return positive
|
||||
// infinity.
|
||||
func computeDistance(a *coordinate.Coordinate, b *coordinate.Coordinate) float64 {
|
||||
if a == nil || b == nil {
|
||||
return math.Inf(1.0)
|
||||
}
|
||||
|
||||
return a.DistanceTo(b).Seconds()
|
||||
}
|
||||
|
||||
// nodeSorter takes a list of nodes and a parallel vector of distances and
|
||||
// implements sort.Interface, keeping both structures coherent and sorting by
|
||||
// distance.
|
||||
type nodeSorter struct {
|
||||
Nodes structs.Nodes
|
||||
Vec []float64
|
||||
}
|
||||
|
||||
// newNodeSorter returns a new sorter for the given source coordinate and set of
|
||||
// nodes.
|
||||
func (s *Server) newNodeSorter(c *coordinate.Coordinate, nodes structs.Nodes) (sort.Interface, error) {
|
||||
state := s.fsm.State()
|
||||
vec := make([]float64, len(nodes))
|
||||
for i, node := range nodes {
|
||||
coord, err := state.CoordinateGetRaw(node.Node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vec[i] = computeDistance(c, coord)
|
||||
}
|
||||
return &nodeSorter{nodes, vec}, nil
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *nodeSorter) Len() int {
|
||||
return len(n.Nodes)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *nodeSorter) Swap(i, j int) {
|
||||
n.Nodes[i], n.Nodes[j] = n.Nodes[j], n.Nodes[i]
|
||||
n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *nodeSorter) Less(i, j int) bool {
|
||||
return n.Vec[i] < n.Vec[j]
|
||||
}
|
||||
|
||||
// serviceNodeSorter takes a list of service nodes and a parallel vector of
|
||||
// distances and implements sort.Interface, keeping both structures coherent and
|
||||
// sorting by distance.
|
||||
type serviceNodeSorter struct {
|
||||
Nodes structs.ServiceNodes
|
||||
Vec []float64
|
||||
}
|
||||
|
||||
// newServiceNodeSorter returns a new sorter for the given source coordinate and
|
||||
// set of service nodes.
|
||||
func (s *Server) newServiceNodeSorter(c *coordinate.Coordinate, nodes structs.ServiceNodes) (sort.Interface, error) {
|
||||
state := s.fsm.State()
|
||||
vec := make([]float64, len(nodes))
|
||||
for i, node := range nodes {
|
||||
coord, err := state.CoordinateGetRaw(node.Node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vec[i] = computeDistance(c, coord)
|
||||
}
|
||||
return &serviceNodeSorter{nodes, vec}, nil
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *serviceNodeSorter) Len() int {
|
||||
return len(n.Nodes)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *serviceNodeSorter) Swap(i, j int) {
|
||||
n.Nodes[i], n.Nodes[j] = n.Nodes[j], n.Nodes[i]
|
||||
n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *serviceNodeSorter) Less(i, j int) bool {
|
||||
return n.Vec[i] < n.Vec[j]
|
||||
}
|
||||
|
||||
// serviceNodeSorter takes a list of health checks and a parallel vector of
|
||||
// distances and implements sort.Interface, keeping both structures coherent and
|
||||
// sorting by distance.
|
||||
type healthCheckSorter struct {
|
||||
Checks structs.HealthChecks
|
||||
Vec []float64
|
||||
}
|
||||
|
||||
// newHealthCheckSorter returns a new sorter for the given source coordinate and
|
||||
// set of health checks with nodes.
|
||||
func (s *Server) newHealthCheckSorter(c *coordinate.Coordinate, checks structs.HealthChecks) (sort.Interface, error) {
|
||||
state := s.fsm.State()
|
||||
vec := make([]float64, len(checks))
|
||||
for i, check := range checks {
|
||||
coord, err := state.CoordinateGetRaw(check.Node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vec[i] = computeDistance(c, coord)
|
||||
}
|
||||
return &healthCheckSorter{checks, vec}, nil
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *healthCheckSorter) Len() int {
|
||||
return len(n.Checks)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *healthCheckSorter) Swap(i, j int) {
|
||||
n.Checks[i], n.Checks[j] = n.Checks[j], n.Checks[i]
|
||||
n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *healthCheckSorter) Less(i, j int) bool {
|
||||
return n.Vec[i] < n.Vec[j]
|
||||
}
|
||||
|
||||
// checkServiceNodeSorter takes a list of service nodes and a parallel vector of
|
||||
// distances and implements sort.Interface, keeping both structures coherent and
|
||||
// sorting by distance.
|
||||
type checkServiceNodeSorter struct {
|
||||
Nodes structs.CheckServiceNodes
|
||||
Vec []float64
|
||||
}
|
||||
|
||||
// newCheckServiceNodeSorter returns a new sorter for the given source coordinate
|
||||
// and set of nodes with health checks.
|
||||
func (s *Server) newCheckServiceNodeSorter(c *coordinate.Coordinate, nodes structs.CheckServiceNodes) (sort.Interface, error) {
|
||||
state := s.fsm.State()
|
||||
vec := make([]float64, len(nodes))
|
||||
for i, node := range nodes {
|
||||
coord, err := state.CoordinateGetRaw(node.Node.Node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vec[i] = computeDistance(c, coord)
|
||||
}
|
||||
return &checkServiceNodeSorter{nodes, vec}, nil
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *checkServiceNodeSorter) Len() int {
|
||||
return len(n.Nodes)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *checkServiceNodeSorter) Swap(i, j int) {
|
||||
n.Nodes[i], n.Nodes[j] = n.Nodes[j], n.Nodes[i]
|
||||
n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *checkServiceNodeSorter) Less(i, j int) bool {
|
||||
return n.Vec[i] < n.Vec[j]
|
||||
}
|
||||
|
||||
// newSorterByDistanceFrom returns a sorter for the given type.
|
||||
func (s *Server) newSorterByDistanceFrom(c *coordinate.Coordinate, subj interface{}) (sort.Interface, error) {
|
||||
switch v := subj.(type) {
|
||||
case structs.Nodes:
|
||||
return s.newNodeSorter(c, v)
|
||||
case structs.ServiceNodes:
|
||||
return s.newServiceNodeSorter(c, v)
|
||||
case structs.HealthChecks:
|
||||
return s.newHealthCheckSorter(c, v)
|
||||
case structs.CheckServiceNodes:
|
||||
return s.newCheckServiceNodeSorter(c, v)
|
||||
default:
|
||||
panic(fmt.Errorf("Unhandled type passed to newSorterByDistanceFrom: %#v", subj))
|
||||
}
|
||||
}
|
||||
|
||||
// sortNodesByDistanceFrom is used to sort results from our service catalog based
|
||||
// on the round trip time from the given source node. Nodes with missing coordinates
|
||||
// will get stable sorted at the end of the list.
|
||||
//
|
||||
// If coordinates are disabled this will be a no-op.
|
||||
func (s *Server) sortNodesByDistanceFrom(source structs.QuerySource, subj interface{}) error {
|
||||
// Make it safe to call this without having to check if coordinates are
|
||||
// disabled first.
|
||||
if s.config.DisableCoordinates {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can't sort if there's no source node.
|
||||
if source.Node == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can't compare coordinates across DCs.
|
||||
if source.Datacenter != s.config.Datacenter {
|
||||
return nil
|
||||
}
|
||||
|
||||
// There won't always be a coordinate for the source node. If there's not
|
||||
// one then we can bail out because there's no meaning for the sort.
|
||||
state := s.fsm.State()
|
||||
coord, err := state.CoordinateGetRaw(source.Node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if coord == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do the sort!
|
||||
sorter, err := s.newSorterByDistanceFrom(coord, subj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sort.Stable(sorter)
|
||||
return nil
|
||||
}
|
||||
|
||||
// serfer provides the coordinate information we need from the Server in an
|
||||
// interface that's easy to mock out for testing. Without this, we'd have to
|
||||
// do some really painful setup to get good unit test coverage of all the cases.
|
||||
type serfer interface {
|
||||
GetDatacenter() string
|
||||
GetCoordinate() (*coordinate.Coordinate, error)
|
||||
GetCachedCoordinate(node string) (*coordinate.Coordinate, bool)
|
||||
GetNodesForDatacenter(dc string) []string
|
||||
}
|
||||
|
||||
// serverSerfer wraps a Server with the serfer interface.
|
||||
type serverSerfer struct {
|
||||
server *Server
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *serverSerfer) GetDatacenter() string {
|
||||
return s.server.config.Datacenter
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *serverSerfer) GetCoordinate() (*coordinate.Coordinate, error) {
|
||||
return s.server.serfWAN.GetCoordinate()
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *serverSerfer) GetCachedCoordinate(node string) (*coordinate.Coordinate, bool) {
|
||||
return s.server.serfWAN.GetCachedCoordinate(node)
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *serverSerfer) GetNodesForDatacenter(dc string) []string {
|
||||
s.server.remoteLock.RLock()
|
||||
defer s.server.remoteLock.RUnlock()
|
||||
|
||||
nodes := make([]string, 0)
|
||||
for _, part := range s.server.remoteConsuls[dc] {
|
||||
nodes = append(nodes, part.Name)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// sortDatacentersByDistance will sort the given list of DCs based on the
|
||||
// median RTT to all nodes we know about from the WAN gossip pool). DCs with
|
||||
// missing coordinates will be stable sorted to the end of the list.
|
||||
//
|
||||
// If coordinates are disabled this will be a no-op.
|
||||
func (s *Server) sortDatacentersByDistance(dcs []string) error {
|
||||
// Make it safe to call this without having to check if coordinates are
|
||||
// disabled first.
|
||||
if s.config.DisableCoordinates {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do the sort!
|
||||
serfer := serverSerfer{s}
|
||||
return sortDatacentersByDistance(&serfer, dcs)
|
||||
}
|
||||
|
||||
// getDatacenterDistance will return the median round trip time estimate for
|
||||
// the given DC from the given serfer, in seconds. This will return positive
|
||||
// infinity if no coordinates are available.
|
||||
func getDatacenterDistance(s serfer, dc string) (float64, error) {
|
||||
// If this is the serfer's DC then just bail with zero RTT.
|
||||
if dc == s.GetDatacenter() {
|
||||
return 0.0, nil
|
||||
}
|
||||
|
||||
// Otherwise measure from the serfer to the nodes in the other DC.
|
||||
coord, err := s.GetCoordinate()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
|
||||
// Fetch all the nodes in the DC and record their distance, if available.
|
||||
nodes := s.GetNodesForDatacenter(dc)
|
||||
subvec := make([]float64, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
if other, ok := s.GetCachedCoordinate(node); ok {
|
||||
subvec = append(subvec, computeDistance(coord, other))
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the median by sorting and taking the middle item.
|
||||
if len(subvec) > 0 {
|
||||
sort.Float64s(subvec)
|
||||
return subvec[len(subvec)/2], nil
|
||||
}
|
||||
|
||||
// Return the default infinity value.
|
||||
return computeDistance(coord, nil), nil
|
||||
}
|
||||
|
||||
// datacenterSorter takes a list of DC names and a parallel vector of distances
|
||||
// and implements sort.Interface, keeping both structures coherent and sorting
|
||||
// by distance.
|
||||
type datacenterSorter struct {
|
||||
Names []string
|
||||
Vec []float64
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *datacenterSorter) Len() int {
|
||||
return len(n.Names)
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *datacenterSorter) Swap(i, j int) {
|
||||
n.Names[i], n.Names[j] = n.Names[j], n.Names[i]
|
||||
n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i]
|
||||
}
|
||||
|
||||
// See sort.Interface.
|
||||
func (n *datacenterSorter) Less(i, j int) bool {
|
||||
return n.Vec[i] < n.Vec[j]
|
||||
}
|
||||
|
||||
// sortDatacentersByDistance will sort the given list of DCs based on the
|
||||
// median RTT to all nodes the given serfer knows about from the WAN gossip
|
||||
// pool). DCs with missing coordinates will be stable sorted to the end of the
|
||||
// list.
|
||||
func sortDatacentersByDistance(s serfer, dcs []string) error {
|
||||
// Build up a list of median distances to the other DCs.
|
||||
vec := make([]float64, len(dcs))
|
||||
for i, dc := range dcs {
|
||||
rtt, err := getDatacenterDistance(s, dc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
vec[i] = rtt
|
||||
}
|
||||
|
||||
sorter := &datacenterSorter{dcs, vec}
|
||||
sort.Stable(sorter)
|
||||
return nil
|
||||
}
|
||||
|
||||
// getDatacenterMaps returns the raw coordinates of all the nodes in the
|
||||
// given list of DCs (the output list will preserve the incoming order).
|
||||
func (s *Server) getDatacenterMaps(dcs []string) []structs.DatacenterMap {
|
||||
serfer := serverSerfer{s}
|
||||
return getDatacenterMaps(&serfer, dcs)
|
||||
}
|
||||
|
||||
// getDatacenterMaps returns the raw coordinates of all the nodes in the
|
||||
// given list of DCs (the output list will preserve the incoming order).
|
||||
func getDatacenterMaps(s serfer, dcs []string) []structs.DatacenterMap {
|
||||
maps := make([]structs.DatacenterMap, 0, len(dcs))
|
||||
for _, dc := range dcs {
|
||||
m := structs.DatacenterMap{Datacenter: dc}
|
||||
nodes := s.GetNodesForDatacenter(dc)
|
||||
for _, node := range nodes {
|
||||
if coord, ok := s.GetCachedCoordinate(node); ok {
|
||||
entry := &structs.Coordinate{node, coord}
|
||||
m.Coordinates = append(m.Coordinates, entry)
|
||||
}
|
||||
}
|
||||
maps = append(maps, m)
|
||||
}
|
||||
return maps
|
||||
}
|
580
consul/rtt_test.go
Normal file
580
consul/rtt_test.go
Normal file
|
@ -0,0 +1,580 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"net/rpc"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
// generateCoordinate creates a new coordinate with the given distance from the
|
||||
// origin.
|
||||
func generateCoordinate(rtt time.Duration) *coordinate.Coordinate {
|
||||
coord := coordinate.NewCoordinate(coordinate.DefaultConfig())
|
||||
coord.Vec[0] = rtt.Seconds()
|
||||
coord.Height = 0
|
||||
return coord
|
||||
}
|
||||
|
||||
// verifyNodeSort makes sure the order of the nodes in the slice is the same as
|
||||
// the expected order, expressed as a comma-separated string.
|
||||
func verifyNodeSort(t *testing.T, nodes structs.Nodes, expected string) {
|
||||
vec := make([]string, len(nodes))
|
||||
for i, node := range nodes {
|
||||
vec[i] = node.Node
|
||||
}
|
||||
actual := strings.Join(vec, ",")
|
||||
if actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// verifyServiceNodeSort makes sure the order of the nodes in the slice is the
|
||||
// same as the expected order, expressed as a comma-separated string.
|
||||
func verifyServiceNodeSort(t *testing.T, nodes structs.ServiceNodes, expected string) {
|
||||
vec := make([]string, len(nodes))
|
||||
for i, node := range nodes {
|
||||
vec[i] = node.Node
|
||||
}
|
||||
actual := strings.Join(vec, ",")
|
||||
if actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// verifyHealthCheckSort makes sure the order of the nodes in the slice is the
|
||||
// same as the expected order, expressed as a comma-separated string.
|
||||
func verifyHealthCheckSort(t *testing.T, checks structs.HealthChecks, expected string) {
|
||||
vec := make([]string, len(checks))
|
||||
for i, check := range checks {
|
||||
vec[i] = check.Node
|
||||
}
|
||||
actual := strings.Join(vec, ",")
|
||||
if actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// verifyCheckServiceNodeSort makes sure the order of the nodes in the slice is
|
||||
// the same as the expected order, expressed as a comma-separated string.
|
||||
func verifyCheckServiceNodeSort(t *testing.T, nodes structs.CheckServiceNodes, expected string) {
|
||||
vec := make([]string, len(nodes))
|
||||
for i, node := range nodes {
|
||||
vec[i] = node.Node.Node
|
||||
}
|
||||
actual := strings.Join(vec, ",")
|
||||
if actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// seedCoordinates uses the client to set up a set of nodes with a specific
|
||||
// set of distances from the origin. We also include the server so that we
|
||||
// can wait for the coordinates to get committed to the Raft log.
|
||||
//
|
||||
// Here's the layout of the nodes:
|
||||
//
|
||||
// node3 node2 node5 node4 node1
|
||||
// | | | | | | | | | | |
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 (ms)
|
||||
//
|
||||
func seedCoordinates(t *testing.T, codec rpc.ClientCodec, server *Server) {
|
||||
// Register some nodes.
|
||||
for i := 0; i < 5; i++ {
|
||||
req := structs.RegisterRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: fmt.Sprintf("node%d", i+1),
|
||||
Address: "127.0.0.1",
|
||||
}
|
||||
var reply struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Seed the fixed setup of the nodes.
|
||||
updates := []structs.CoordinateUpdateRequest{
|
||||
structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node1",
|
||||
Coord: generateCoordinate(10 * time.Millisecond),
|
||||
},
|
||||
structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node2",
|
||||
Coord: generateCoordinate(2 * time.Millisecond),
|
||||
},
|
||||
structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node3",
|
||||
Coord: generateCoordinate(1 * time.Millisecond),
|
||||
},
|
||||
structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node4",
|
||||
Coord: generateCoordinate(8 * time.Millisecond),
|
||||
},
|
||||
structs.CoordinateUpdateRequest{
|
||||
Datacenter: "dc1",
|
||||
Node: "node5",
|
||||
Coord: generateCoordinate(3 * time.Millisecond),
|
||||
},
|
||||
}
|
||||
|
||||
// Apply the updates and wait a while for the batch to get committed to
|
||||
// the Raft log.
|
||||
for _, update := range updates {
|
||||
var out struct{}
|
||||
if err := msgpackrpc.CallWithCodec(codec, "Coordinate.Update", &update, &out); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
time.Sleep(2 * server.config.CoordinateUpdatePeriod)
|
||||
}
|
||||
|
||||
func TestRtt_sortNodesByDistanceFrom(t *testing.T) {
|
||||
dir, server := testServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer server.Shutdown()
|
||||
|
||||
codec := rpcClient(t, server)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, server.RPC, "dc1")
|
||||
|
||||
seedCoordinates(t, codec, server)
|
||||
nodes := structs.Nodes{
|
||||
&structs.Node{Node: "apple"},
|
||||
&structs.Node{Node: "node1"},
|
||||
&structs.Node{Node: "node2"},
|
||||
&structs.Node{Node: "node3"},
|
||||
&structs.Node{Node: "node4"},
|
||||
&structs.Node{Node: "node5"},
|
||||
}
|
||||
|
||||
// The zero value for the source should not trigger any sorting.
|
||||
var source structs.QuerySource
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5")
|
||||
|
||||
// Same for a source in some other DC.
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc2"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5")
|
||||
|
||||
// Same for a source node in our DC that we have no coordinate for.
|
||||
source.Node = "apple"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5")
|
||||
|
||||
// Set source to legit values relative to node1 but disable coordinates.
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc1"
|
||||
server.config.DisableCoordinates = true
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5")
|
||||
|
||||
// Now enable coordinates and sort relative to node1, note that apple
|
||||
// doesn't have any seeded coordinate info so it should end up at the
|
||||
// end, despite its lexical hegemony.
|
||||
server.config.DisableCoordinates = false
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple")
|
||||
}
|
||||
|
||||
func TestRtt_sortNodesByDistanceFrom_Nodes(t *testing.T) {
|
||||
dir, server := testServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer server.Shutdown()
|
||||
|
||||
codec := rpcClient(t, server)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, server.RPC, "dc1")
|
||||
|
||||
seedCoordinates(t, codec, server)
|
||||
nodes := structs.Nodes{
|
||||
&structs.Node{Node: "apple"},
|
||||
&structs.Node{Node: "node1"},
|
||||
&structs.Node{Node: "node2"},
|
||||
&structs.Node{Node: "node3"},
|
||||
&structs.Node{Node: "node4"},
|
||||
&structs.Node{Node: "node5"},
|
||||
}
|
||||
|
||||
// Now sort relative to node1, note that apple doesn't have any
|
||||
// seeded coordinate info so it should end up at the end, despite
|
||||
// its lexical hegemony.
|
||||
var source structs.QuerySource
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple")
|
||||
|
||||
// Try another sort from node2. Note that node5 and node3 are the
|
||||
// same distance away so the stable sort should preserve the order
|
||||
// they were in from the previous sort.
|
||||
source.Node = "node2"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "node2,node5,node3,node4,node1,apple")
|
||||
|
||||
// Let's exercise the stable sort explicitly to make sure we didn't
|
||||
// just get lucky.
|
||||
nodes[1], nodes[2] = nodes[2], nodes[1]
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyNodeSort(t, nodes, "node2,node3,node5,node4,node1,apple")
|
||||
}
|
||||
|
||||
func TestRtt_sortNodesByDistanceFrom_ServiceNodes(t *testing.T) {
|
||||
dir, server := testServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer server.Shutdown()
|
||||
|
||||
codec := rpcClient(t, server)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, server.RPC, "dc1")
|
||||
|
||||
seedCoordinates(t, codec, server)
|
||||
nodes := structs.ServiceNodes{
|
||||
&structs.ServiceNode{Node: "apple"},
|
||||
&structs.ServiceNode{Node: "node1"},
|
||||
&structs.ServiceNode{Node: "node2"},
|
||||
&structs.ServiceNode{Node: "node3"},
|
||||
&structs.ServiceNode{Node: "node4"},
|
||||
&structs.ServiceNode{Node: "node5"},
|
||||
}
|
||||
|
||||
// Now sort relative to node1, note that apple doesn't have any
|
||||
// seeded coordinate info so it should end up at the end, despite
|
||||
// its lexical hegemony.
|
||||
var source structs.QuerySource
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyServiceNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple")
|
||||
|
||||
// Try another sort from node2. Note that node5 and node3 are the
|
||||
// same distance away so the stable sort should preserve the order
|
||||
// they were in from the previous sort.
|
||||
source.Node = "node2"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyServiceNodeSort(t, nodes, "node2,node5,node3,node4,node1,apple")
|
||||
|
||||
// Let's exercise the stable sort explicitly to make sure we didn't
|
||||
// just get lucky.
|
||||
nodes[1], nodes[2] = nodes[2], nodes[1]
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyServiceNodeSort(t, nodes, "node2,node3,node5,node4,node1,apple")
|
||||
}
|
||||
|
||||
func TestRtt_sortNodesByDistanceFrom_HealthChecks(t *testing.T) {
|
||||
dir, server := testServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer server.Shutdown()
|
||||
|
||||
codec := rpcClient(t, server)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, server.RPC, "dc1")
|
||||
|
||||
seedCoordinates(t, codec, server)
|
||||
checks := structs.HealthChecks{
|
||||
&structs.HealthCheck{Node: "apple"},
|
||||
&structs.HealthCheck{Node: "node1"},
|
||||
&structs.HealthCheck{Node: "node2"},
|
||||
&structs.HealthCheck{Node: "node3"},
|
||||
&structs.HealthCheck{Node: "node4"},
|
||||
&structs.HealthCheck{Node: "node5"},
|
||||
}
|
||||
|
||||
// Now sort relative to node1, note that apple doesn't have any
|
||||
// seeded coordinate info so it should end up at the end, despite
|
||||
// its lexical hegemony.
|
||||
var source structs.QuerySource
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, checks); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyHealthCheckSort(t, checks, "node1,node4,node5,node2,node3,apple")
|
||||
|
||||
// Try another sort from node2. Note that node5 and node3 are the
|
||||
// same distance away so the stable sort should preserve the order
|
||||
// they were in from the previous sort.
|
||||
source.Node = "node2"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, checks); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyHealthCheckSort(t, checks, "node2,node5,node3,node4,node1,apple")
|
||||
|
||||
// Let's exercise the stable sort explicitly to make sure we didn't
|
||||
// just get lucky.
|
||||
checks[1], checks[2] = checks[2], checks[1]
|
||||
if err := server.sortNodesByDistanceFrom(source, checks); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyHealthCheckSort(t, checks, "node2,node3,node5,node4,node1,apple")
|
||||
}
|
||||
|
||||
func TestRtt_sortNodesByDistanceFrom_CheckServiceNodes(t *testing.T) {
|
||||
dir, server := testServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer server.Shutdown()
|
||||
|
||||
codec := rpcClient(t, server)
|
||||
defer codec.Close()
|
||||
testutil.WaitForLeader(t, server.RPC, "dc1")
|
||||
|
||||
seedCoordinates(t, codec, server)
|
||||
nodes := structs.CheckServiceNodes{
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "apple"}},
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "node1"}},
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "node2"}},
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "node3"}},
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "node4"}},
|
||||
structs.CheckServiceNode{Node: &structs.Node{Node: "node5"}},
|
||||
}
|
||||
|
||||
// Now sort relative to node1, note that apple doesn't have any
|
||||
// seeded coordinate info so it should end up at the end, despite
|
||||
// its lexical hegemony.
|
||||
var source structs.QuerySource
|
||||
source.Node = "node1"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyCheckServiceNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple")
|
||||
|
||||
// Try another sort from node2. Note that node5 and node3 are the
|
||||
// same distance away so the stable sort should preserve the order
|
||||
// they were in from the previous sort.
|
||||
source.Node = "node2"
|
||||
source.Datacenter = "dc1"
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyCheckServiceNodeSort(t, nodes, "node2,node5,node3,node4,node1,apple")
|
||||
|
||||
// Let's exercise the stable sort explicitly to make sure we didn't
|
||||
// just get lucky.
|
||||
nodes[1], nodes[2] = nodes[2], nodes[1]
|
||||
if err := server.sortNodesByDistanceFrom(source, nodes); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
verifyCheckServiceNodeSort(t, nodes, "node2,node3,node5,node4,node1,apple")
|
||||
}
|
||||
|
||||
// mockNodeMap is keyed by node name and the values are the coordinates of the
|
||||
// node.
|
||||
type mockNodeMap map[string]*coordinate.Coordinate
|
||||
|
||||
// mockServer is used to provide a serfer interface for unit tests. The key is
|
||||
// DC, which selects a map from node name to coordinate for that node.
|
||||
type mockServer map[string]mockNodeMap
|
||||
|
||||
// newMockServer is used to generate a serfer interface that presents a known DC
|
||||
// topology for unit tests. The server is in dc0.
|
||||
//
|
||||
// Here's the layout of the nodes:
|
||||
//
|
||||
// /---- dc1 ----\ /- dc2 -\ /- dc0 -\
|
||||
// node2 node1 node3 node1 node1
|
||||
// | | | | | | | | | | |
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 (ms)
|
||||
//
|
||||
// We also include a node4 in dc1 with no known coordinate, as well as a
|
||||
// mysterious dcX with no nodes with known coordinates.
|
||||
//
|
||||
func newMockServer() *mockServer {
|
||||
s := make(mockServer)
|
||||
s["dc0"] = mockNodeMap{
|
||||
"dc0.node1": generateCoordinate(10 * time.Millisecond),
|
||||
}
|
||||
s["dc1"] = mockNodeMap{
|
||||
"dc1.node1": generateCoordinate(3 * time.Millisecond),
|
||||
"dc1.node2": generateCoordinate(2 * time.Millisecond),
|
||||
"dc1.node3": generateCoordinate(5 * time.Millisecond),
|
||||
"dc1.node4": nil, // no known coordinate
|
||||
}
|
||||
s["dc2"] = mockNodeMap{
|
||||
"dc2.node1": generateCoordinate(8 * time.Millisecond),
|
||||
}
|
||||
s["dcX"] = mockNodeMap{
|
||||
"dcX.node1": nil, // no known coordinate
|
||||
}
|
||||
return &s
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *mockServer) GetDatacenter() string {
|
||||
return "dc0"
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *mockServer) GetCoordinate() (*coordinate.Coordinate, error) {
|
||||
return (*s)["dc0"]["dc0.node1"], nil
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *mockServer) GetCachedCoordinate(node string) (*coordinate.Coordinate, bool) {
|
||||
for _, nodes := range *s {
|
||||
for n, coord := range nodes {
|
||||
if n == node && coord != nil {
|
||||
return coord, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// See serfer.
|
||||
func (s *mockServer) GetNodesForDatacenter(dc string) []string {
|
||||
nodes := make([]string, 0)
|
||||
if n, ok := (*s)[dc]; ok {
|
||||
for name := range n {
|
||||
nodes = append(nodes, name)
|
||||
}
|
||||
}
|
||||
sort.Strings(nodes)
|
||||
return nodes
|
||||
}
|
||||
|
||||
func TestRtt_getDatacenterDistance(t *testing.T) {
|
||||
s := newMockServer()
|
||||
|
||||
// The serfer's own DC is always 0 ms away.
|
||||
if dist, err := getDatacenterDistance(s, "dc0"); err != nil || dist != 0.0 {
|
||||
t.Fatalf("bad: %v err: %v", dist, err)
|
||||
}
|
||||
|
||||
// Check a DC with no coordinates, which should give positive infinity.
|
||||
if dist, err := getDatacenterDistance(s, "dcX"); err != nil || dist != math.Inf(1.0) {
|
||||
t.Fatalf("bad: %v err: %v", dist, err)
|
||||
}
|
||||
|
||||
// Similar for a totally unknown DC.
|
||||
if dist, err := getDatacenterDistance(s, "acdc"); err != nil || dist != math.Inf(1.0) {
|
||||
t.Fatalf("bad: %v err: %v", dist, err)
|
||||
}
|
||||
|
||||
// Check the trivial median case (just one node).
|
||||
if dist, err := getDatacenterDistance(s, "dc2"); err != nil || dist != 0.002 {
|
||||
t.Fatalf("bad: %v err: %v", dist, err)
|
||||
}
|
||||
|
||||
// Check the more interesting median case, note that there's a mystery
|
||||
// node4 in there that should be excluded to make the distances sort
|
||||
// like this:
|
||||
//
|
||||
// [0] node3 (0.005), [1] node1 (0.007), [2] node2 (0.008)
|
||||
//
|
||||
// So the median should be at index 3 / 2 = 1 -> 0.007.
|
||||
if dist, err := getDatacenterDistance(s, "dc1"); err != nil || dist != 0.007 {
|
||||
t.Fatalf("bad: %v err: %v", dist, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRtt_sortDatacentersByDistance(t *testing.T) {
|
||||
s := newMockServer()
|
||||
|
||||
dcs := []string{"acdc", "dc0", "dc1", "dc2", "dcX"}
|
||||
if err := sortDatacentersByDistance(s, dcs); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
expected := "dc0,dc2,dc1,acdc,dcX"
|
||||
if actual := strings.Join(dcs, ","); actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
|
||||
// Make sure the sort is stable and we didn't just get lucky.
|
||||
dcs = []string{"dcX", "dc0", "dc1", "dc2", "acdc"}
|
||||
if err := sortDatacentersByDistance(s, dcs); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
expected = "dc0,dc2,dc1,dcX,acdc"
|
||||
if actual := strings.Join(dcs, ","); actual != expected {
|
||||
t.Fatalf("bad sort: %s != %s", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRtt_getDatacenterMaps(t *testing.T) {
|
||||
s := newMockServer()
|
||||
|
||||
dcs := []string{"dc0", "acdc", "dc1", "dc2", "dcX"}
|
||||
maps := getDatacenterMaps(s, dcs)
|
||||
|
||||
if len(maps) != 5 {
|
||||
t.Fatalf("bad: %v", maps)
|
||||
}
|
||||
|
||||
if maps[0].Datacenter != "dc0" || len(maps[0].Coordinates) != 1 ||
|
||||
maps[0].Coordinates[0].Node != "dc0.node1" {
|
||||
t.Fatalf("bad: %v", maps[0])
|
||||
}
|
||||
verifyCoordinatesEqual(t, maps[0].Coordinates[0].Coord,
|
||||
generateCoordinate(10*time.Millisecond))
|
||||
|
||||
if maps[1].Datacenter != "acdc" || len(maps[1].Coordinates) != 0 {
|
||||
t.Fatalf("bad: %v", maps[1])
|
||||
}
|
||||
|
||||
if maps[2].Datacenter != "dc1" || len(maps[2].Coordinates) != 3 ||
|
||||
maps[2].Coordinates[0].Node != "dc1.node1" ||
|
||||
maps[2].Coordinates[1].Node != "dc1.node2" ||
|
||||
maps[2].Coordinates[2].Node != "dc1.node3" {
|
||||
t.Fatalf("bad: %v", maps[2])
|
||||
}
|
||||
verifyCoordinatesEqual(t, maps[2].Coordinates[0].Coord,
|
||||
generateCoordinate(3*time.Millisecond))
|
||||
verifyCoordinatesEqual(t, maps[2].Coordinates[1].Coord,
|
||||
generateCoordinate(2*time.Millisecond))
|
||||
verifyCoordinatesEqual(t, maps[2].Coordinates[2].Coord,
|
||||
generateCoordinate(5*time.Millisecond))
|
||||
|
||||
if maps[3].Datacenter != "dc2" || len(maps[3].Coordinates) != 1 ||
|
||||
maps[3].Coordinates[0].Node != "dc2.node1" {
|
||||
t.Fatalf("bad: %v", maps[3])
|
||||
}
|
||||
verifyCoordinatesEqual(t, maps[3].Coordinates[0].Coord,
|
||||
generateCoordinate(8*time.Millisecond))
|
||||
|
||||
if maps[4].Datacenter != "dcX" || len(maps[4].Coordinates) != 0 {
|
||||
t.Fatalf("bad: %v", maps[4])
|
||||
}
|
||||
}
|
|
@ -38,11 +38,11 @@ func (s *Server) lanEventHandler() {
|
|||
case e := <-s.eventChLAN:
|
||||
switch e.EventType() {
|
||||
case serf.EventMemberJoin:
|
||||
s.nodeJoin(e.(serf.MemberEvent), false)
|
||||
s.lanNodeJoin(e.(serf.MemberEvent))
|
||||
s.localMemberEvent(e.(serf.MemberEvent))
|
||||
|
||||
case serf.EventMemberLeave, serf.EventMemberFailed:
|
||||
s.nodeFailed(e.(serf.MemberEvent), false)
|
||||
s.lanNodeFailed(e.(serf.MemberEvent))
|
||||
s.localMemberEvent(e.(serf.MemberEvent))
|
||||
|
||||
case serf.EventMemberReap:
|
||||
|
@ -68,9 +68,9 @@ func (s *Server) wanEventHandler() {
|
|||
case e := <-s.eventChWAN:
|
||||
switch e.EventType() {
|
||||
case serf.EventMemberJoin:
|
||||
s.nodeJoin(e.(serf.MemberEvent), true)
|
||||
s.wanNodeJoin(e.(serf.MemberEvent))
|
||||
case serf.EventMemberLeave, serf.EventMemberFailed:
|
||||
s.nodeFailed(e.(serf.MemberEvent), true)
|
||||
s.wanNodeFailed(e.(serf.MemberEvent))
|
||||
case serf.EventMemberUpdate: // Ignore
|
||||
case serf.EventMemberReap: // Ignore
|
||||
case serf.EventUser:
|
||||
|
@ -137,19 +137,40 @@ func (s *Server) localEvent(event serf.UserEvent) {
|
|||
}
|
||||
}
|
||||
|
||||
// nodeJoin is used to handle join events on the both serf clusters
|
||||
func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) {
|
||||
// lanNodeJoin is used to handle join events on the LAN pool.
|
||||
func (s *Server) lanNodeJoin(me serf.MemberEvent) {
|
||||
for _, m := range me.Members {
|
||||
ok, parts := isConsulServer(m)
|
||||
if !ok {
|
||||
if wan {
|
||||
s.logger.Printf("[WARN] consul: non-server in WAN pool: %s", m.Name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: adding server %s", parts)
|
||||
s.logger.Printf("[INFO] consul: adding LAN server %s", parts)
|
||||
|
||||
// Check if this server is known
|
||||
// See if it's configured as part of our DC.
|
||||
if parts.Datacenter == s.config.Datacenter {
|
||||
s.localLock.Lock()
|
||||
s.localConsuls[parts.Addr.String()] = parts
|
||||
s.localLock.Unlock()
|
||||
}
|
||||
|
||||
// If we still expecting to bootstrap, may need to handle this.
|
||||
if s.config.BootstrapExpect != 0 {
|
||||
s.maybeBootstrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// wanNodeJoin is used to handle join events on the WAN pool.
|
||||
func (s *Server) wanNodeJoin(me serf.MemberEvent) {
|
||||
for _, m := range me.Members {
|
||||
ok, parts := isConsulServer(m)
|
||||
if !ok {
|
||||
s.logger.Printf("[WARN] consul: non-server in WAN pool: %s", m.Name)
|
||||
continue
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: adding WAN server %s", parts)
|
||||
|
||||
// Search for this node in our existing remotes.
|
||||
found := false
|
||||
s.remoteLock.Lock()
|
||||
existing := s.remoteConsuls[parts.Datacenter]
|
||||
|
@ -161,23 +182,11 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) {
|
|||
}
|
||||
}
|
||||
|
||||
// Add ot the list if not known
|
||||
// Add to the list if not known.
|
||||
if !found {
|
||||
s.remoteConsuls[parts.Datacenter] = append(existing, parts)
|
||||
}
|
||||
s.remoteLock.Unlock()
|
||||
|
||||
// Add to the local list as well
|
||||
if !wan && parts.Datacenter == s.config.Datacenter {
|
||||
s.localLock.Lock()
|
||||
s.localConsuls[parts.Addr.String()] = parts
|
||||
s.localLock.Unlock()
|
||||
}
|
||||
|
||||
// If we still expecting to bootstrap, may need to handle this
|
||||
if s.config.BootstrapExpect != 0 {
|
||||
s.maybeBootstrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -235,14 +244,29 @@ func (s *Server) maybeBootstrap() {
|
|||
s.config.BootstrapExpect = 0
|
||||
}
|
||||
|
||||
// nodeFailed is used to handle fail events on both the serf clusters
|
||||
func (s *Server) nodeFailed(me serf.MemberEvent, wan bool) {
|
||||
// lanNodeFailed is used to handle fail events on the LAN pool.
|
||||
func (s *Server) lanNodeFailed(me serf.MemberEvent) {
|
||||
for _, m := range me.Members {
|
||||
ok, parts := isConsulServer(m)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: removing server %s", parts)
|
||||
s.logger.Printf("[INFO] consul: removing LAN server %s", parts)
|
||||
|
||||
s.localLock.Lock()
|
||||
delete(s.localConsuls, parts.Addr.String())
|
||||
s.localLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// wanNodeFailed is used to handle fail events on the WAN pool.
|
||||
func (s *Server) wanNodeFailed(me serf.MemberEvent) {
|
||||
for _, m := range me.Members {
|
||||
ok, parts := isConsulServer(m)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
s.logger.Printf("[INFO] consul: removing WAN server %s", parts)
|
||||
|
||||
// Remove the server if known
|
||||
s.remoteLock.Lock()
|
||||
|
@ -264,12 +288,5 @@ func (s *Server) nodeFailed(me serf.MemberEvent, wan bool) {
|
|||
s.remoteConsuls[parts.Datacenter] = existing
|
||||
}
|
||||
s.remoteLock.Unlock()
|
||||
|
||||
// Remove from the local list as well
|
||||
if !wan {
|
||||
s.localLock.Lock()
|
||||
delete(s.localConsuls, parts.Addr.String())
|
||||
s.localLock.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ import (
|
|||
"github.com/hashicorp/consul/tlsutil"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/raft-boltdb"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
|
@ -27,7 +28,15 @@ import (
|
|||
// protocol versions.
|
||||
const (
|
||||
ProtocolVersionMin uint8 = 1
|
||||
ProtocolVersionMax = 2
|
||||
|
||||
// Version 3 added support for network coordinates but we kept the
|
||||
// default protocol version at 2 to ease the transition to this new
|
||||
// feature. A Consul agent speaking version 2 of the protocol will
|
||||
// attempt to send its coordinates to a server who understands version
|
||||
// 3 or greater.
|
||||
ProtocolVersion2Compatible = 2
|
||||
|
||||
ProtocolVersionMax = 3
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -151,6 +160,7 @@ type endpoints struct {
|
|||
Session *Session
|
||||
Internal *Internal
|
||||
ACL *ACL
|
||||
Coordinate *Coordinate
|
||||
}
|
||||
|
||||
// NewServer is used to construct a new Consul server from the
|
||||
|
@ -306,6 +316,10 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w
|
|||
if err := ensurePath(conf.SnapshotPath, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Plumb down the enable coordinates flag.
|
||||
conf.DisableCoordinates = s.config.DisableCoordinates
|
||||
|
||||
return serf.Create(conf)
|
||||
}
|
||||
|
||||
|
@ -396,6 +410,7 @@ func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
|
|||
s.endpoints.Session = &Session{s}
|
||||
s.endpoints.Internal = &Internal{s}
|
||||
s.endpoints.ACL = &ACL{s}
|
||||
s.endpoints.Coordinate = NewCoordinate(s)
|
||||
|
||||
// Register the handlers
|
||||
s.rpcServer.Register(s.endpoints.Status)
|
||||
|
@ -405,6 +420,7 @@ func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
|
|||
s.rpcServer.Register(s.endpoints.Session)
|
||||
s.rpcServer.Register(s.endpoints.Internal)
|
||||
s.rpcServer.Register(s.endpoints.ACL)
|
||||
s.rpcServer.Register(s.endpoints.Coordinate)
|
||||
|
||||
list, err := net.ListenTCP("tcp", s.config.RPCAddr)
|
||||
if err != nil {
|
||||
|
@ -690,3 +706,13 @@ func (s *Server) Stats() map[string]map[string]string {
|
|||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
// GetLANCoordinate returns the coordinate of the server in the LAN gossip pool.
|
||||
func (s *Server) GetLANCoordinate() (*coordinate.Coordinate, error) {
|
||||
return s.serfLAN.GetCoordinate()
|
||||
}
|
||||
|
||||
// GetWANCoordinate returns the coordinate of the server in the WAN gossip pool.
|
||||
func (s *Server) GetWANCoordinate() (*coordinate.Coordinate, error) {
|
||||
return s.serfWAN.GetCoordinate()
|
||||
}
|
||||
|
|
|
@ -66,6 +66,9 @@ func testServerConfig(t *testing.T, NodeName string) (string, *Config) {
|
|||
config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
|
||||
|
||||
config.ReconcileInterval = 100 * time.Millisecond
|
||||
|
||||
config.DisableCoordinates = false
|
||||
config.CoordinateUpdatePeriod = 100 * time.Millisecond
|
||||
return dir, config
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ func stateStoreSchema() *memdb.DBSchema {
|
|||
sessionsTableSchema,
|
||||
sessionChecksTableSchema,
|
||||
aclsTableSchema,
|
||||
coordinatesTableSchema,
|
||||
}
|
||||
|
||||
// Add the tables to the root schema
|
||||
|
@ -345,3 +346,22 @@ func aclsTableSchema() *memdb.TableSchema {
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
// coordinatesTableSchema returns a new table schema used for storing
|
||||
// network coordinates.
|
||||
func coordinatesTableSchema() *memdb.TableSchema {
|
||||
return &memdb.TableSchema{
|
||||
Name: "coordinates",
|
||||
Indexes: map[string]*memdb.IndexSchema{
|
||||
"id": &memdb.IndexSchema{
|
||||
Name: "id",
|
||||
AllowMissing: false,
|
||||
Unique: true,
|
||||
Indexer: &memdb.StringFieldIndex{
|
||||
Field: "Node",
|
||||
Lowercase: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -196,6 +197,15 @@ func (s *StateSnapshot) ACLs() (memdb.ResultIterator, error) {
|
|||
return iter, nil
|
||||
}
|
||||
|
||||
// Coordinates is used to pull all the coordinates from the snapshot.
|
||||
func (s *StateSnapshot) Coordinates() (memdb.ResultIterator, error) {
|
||||
iter, err := s.tx.Get("coordinates", "id")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return iter, nil
|
||||
}
|
||||
|
||||
// Restore is used to efficiently manage restoring a large amount of data into
|
||||
// the state store. It works by doing all the restores inside of a single
|
||||
// transaction.
|
||||
|
@ -299,6 +309,24 @@ func (s *StateRestore) ACL(acl *structs.ACL) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Coordinates is used when restoring from a snapshot. For general inserts, use
|
||||
// CoordinateBatchUpdate. We do less vetting of the updates here because they
|
||||
// already got checked on the way in during a batch update.
|
||||
func (s *StateRestore) Coordinates(idx uint64, updates structs.Coordinates) error {
|
||||
for _, update := range updates {
|
||||
if err := s.tx.Insert("coordinates", update); err != nil {
|
||||
return fmt.Errorf("failed restoring coordinate: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := indexUpdateMaxTxn(s.tx, idx, "coordinates"); err != nil {
|
||||
return fmt.Errorf("failed updating index: %s", err)
|
||||
}
|
||||
|
||||
s.watches.Arm("coordinates")
|
||||
return nil
|
||||
}
|
||||
|
||||
// maxIndex is a helper used to retrieve the highest known index
|
||||
// amongst a set of tables in the db.
|
||||
func (s *StateStore) maxIndex(tables ...string) uint64 {
|
||||
|
@ -379,6 +407,8 @@ func (s *StateStore) getWatchTables(method string) []string {
|
|||
return []string{"sessions"}
|
||||
case "ACLGet", "ACLList":
|
||||
return []string{"acls"}
|
||||
case "Coordinates":
|
||||
return []string{"coordinates"}
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("Unknown method %s", method))
|
||||
|
@ -583,7 +613,6 @@ func (s *StateStore) deleteNodeTxn(tx *memdb.Txn, idx uint64, nodeID string) err
|
|||
// Use a watch manager since the inner functions can perform multiple
|
||||
// ops per table.
|
||||
watches := NewDumbWatchManager(s.tableWatches)
|
||||
watches.Arm("nodes")
|
||||
|
||||
// Delete all services associated with the node and update the service index.
|
||||
services, err := tx.Get("services", "node", nodeID)
|
||||
|
@ -620,6 +649,21 @@ func (s *StateStore) deleteNodeTxn(tx *memdb.Txn, idx uint64, nodeID string) err
|
|||
}
|
||||
}
|
||||
|
||||
// Delete any coordinate associated with this node.
|
||||
coord, err := tx.First("coordinates", "id", nodeID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed coordinate lookup: %s", err)
|
||||
}
|
||||
if coord != nil {
|
||||
if err := tx.Delete("coordinates", coord); err != nil {
|
||||
return fmt.Errorf("failed deleting coordinate: %s", err)
|
||||
}
|
||||
if err := tx.Insert("index", &IndexEntry{"coordinates", idx}); err != nil {
|
||||
return fmt.Errorf("failed updating index: %s", err)
|
||||
}
|
||||
watches.Arm("coordinates")
|
||||
}
|
||||
|
||||
// Delete the node and update the index.
|
||||
if err := tx.Delete("nodes", node); err != nil {
|
||||
return fmt.Errorf("failed deleting node: %s", err)
|
||||
|
@ -645,6 +689,7 @@ func (s *StateStore) deleteNodeTxn(tx *memdb.Txn, idx uint64, nodeID string) err
|
|||
}
|
||||
}
|
||||
|
||||
watches.Arm("nodes")
|
||||
tx.Defer(func() { watches.Notify() })
|
||||
return nil
|
||||
}
|
||||
|
@ -2231,3 +2276,84 @@ func (s *StateStore) aclDeleteTxn(tx *memdb.Txn, idx uint64, aclID string) error
|
|||
tx.Defer(func() { s.tableWatches["acls"].Notify() })
|
||||
return nil
|
||||
}
|
||||
|
||||
// CoordinateGetRaw queries for the coordinate of the given node. This is an
|
||||
// unusual state store method because it just returns the raw coordinate or
|
||||
// nil, none of the Raft or node information is returned. This hits the 90%
|
||||
// internal-to-Consul use case for this data, and this isn't exposed via an
|
||||
// endpoint, so it doesn't matter that the Raft info isn't available.
|
||||
func (s *StateStore) CoordinateGetRaw(node string) (*coordinate.Coordinate, error) {
|
||||
tx := s.db.Txn(false)
|
||||
defer tx.Abort()
|
||||
|
||||
// Pull the full coordinate entry.
|
||||
coord, err := tx.First("coordinates", "id", node)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed coordinate lookup: %s", err)
|
||||
}
|
||||
|
||||
// Pick out just the raw coordinate.
|
||||
if coord != nil {
|
||||
return coord.(*structs.Coordinate).Coord, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Coordinates queries for all nodes with coordinates.
|
||||
func (s *StateStore) Coordinates() (uint64, structs.Coordinates, error) {
|
||||
tx := s.db.Txn(false)
|
||||
defer tx.Abort()
|
||||
|
||||
// Get the table index.
|
||||
idx := maxIndexTxn(tx, s.getWatchTables("Coordinates")...)
|
||||
|
||||
// Pull all the coordinates.
|
||||
coords, err := tx.Get("coordinates", "id")
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("failed coordinate lookup: %s", err)
|
||||
}
|
||||
var results structs.Coordinates
|
||||
for coord := coords.Next(); coord != nil; coord = coords.Next() {
|
||||
results = append(results, coord.(*structs.Coordinate))
|
||||
}
|
||||
return idx, results, nil
|
||||
}
|
||||
|
||||
// CoordinateBatchUpdate processes a batch of coordinate updates and applies
|
||||
// them in a single transaction.
|
||||
func (s *StateStore) CoordinateBatchUpdate(idx uint64, updates structs.Coordinates) error {
|
||||
tx := s.db.Txn(true)
|
||||
defer tx.Abort()
|
||||
|
||||
// Upsert the coordinates.
|
||||
for _, update := range updates {
|
||||
// Since the cleanup of coordinates is tied to deletion of
|
||||
// nodes, we silently drop any updates for nodes that we don't
|
||||
// know about. This might be possible during normal operation
|
||||
// if we happen to get a coordinate update for a node that
|
||||
// hasn't been able to add itself to the catalog yet. Since we
|
||||
// don't carefully sequence this, and since it will fix itself
|
||||
// on the next coordinate update from that node, we don't return
|
||||
// an error or log anything.
|
||||
node, err := tx.First("nodes", "id", update.Node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed node lookup: %s", err)
|
||||
}
|
||||
if node == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := tx.Insert("coordinates", update); err != nil {
|
||||
return fmt.Errorf("failed inserting coordinate: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the index.
|
||||
if err := tx.Insert("index", &IndexEntry{"coordinates", idx}); err != nil {
|
||||
return fmt.Errorf("failed updating index: %s", err)
|
||||
}
|
||||
|
||||
tx.Defer(func() { s.tableWatches["coordinates"].Notify() })
|
||||
tx.Commit()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package state
|
|||
import (
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -10,6 +11,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
func testUUID() string {
|
||||
|
@ -999,20 +1001,31 @@ func TestStateStore_Node_Watches(t *testing.T) {
|
|||
}
|
||||
})
|
||||
|
||||
// Check that a delete of a node + service + check triggers all three
|
||||
// tables in one shot.
|
||||
// Check that a delete of a node + service + check + coordinate triggers
|
||||
// all tables in one shot.
|
||||
testRegisterNode(t, s, 4, "node1")
|
||||
testRegisterService(t, s, 5, "node1", "service1")
|
||||
testRegisterCheck(t, s, 6, "node1", "service1", "check3", structs.HealthPassing)
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(7, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
verifyWatch(t, s.getTableWatch("nodes"), func() {
|
||||
verifyWatch(t, s.getTableWatch("services"), func() {
|
||||
verifyWatch(t, s.getTableWatch("checks"), func() {
|
||||
verifyWatch(t, s.getTableWatch("coordinates"), func() {
|
||||
if err := s.DeleteNode(7, "node1"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestStateStore_EnsureService(t *testing.T) {
|
||||
|
@ -4721,3 +4734,291 @@ func TestStateStore_ACL_Watches(t *testing.T) {
|
|||
restore.Commit()
|
||||
})
|
||||
}
|
||||
|
||||
// generateRandomCoordinate creates a random coordinate. This mucks with the
|
||||
// underlying structure directly, so it's not really useful for any particular
|
||||
// position in the network, but it's a good payload to send through to make
|
||||
// sure things come out the other side or get stored correctly.
|
||||
func generateRandomCoordinate() *coordinate.Coordinate {
|
||||
config := coordinate.DefaultConfig()
|
||||
coord := coordinate.NewCoordinate(config)
|
||||
for i := range coord.Vec {
|
||||
coord.Vec[i] = rand.NormFloat64()
|
||||
}
|
||||
coord.Error = rand.NormFloat64()
|
||||
coord.Adjustment = rand.NormFloat64()
|
||||
return coord
|
||||
}
|
||||
|
||||
func TestStateStore_Coordinate_Updates(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
// Make sure the coordinates list starts out empty, and that a query for
|
||||
// a raw coordinate for a nonexistent node doesn't do anything bad.
|
||||
idx, coords, err := s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 0 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if coords != nil {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
coord, err := s.CoordinateGetRaw("nope")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if coord != nil {
|
||||
t.Fatalf("bad: %#v", coord)
|
||||
}
|
||||
|
||||
// Make an update for nodes that don't exist and make sure they get
|
||||
// ignored.
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
&structs.Coordinate{
|
||||
Node: "node2",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(1, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Should still be empty, though applying an empty batch does bump
|
||||
// the table index.
|
||||
idx, coords, err = s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 1 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if coords != nil {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
|
||||
// Register the nodes then do the update again.
|
||||
testRegisterNode(t, s, 1, "node1")
|
||||
testRegisterNode(t, s, 2, "node2")
|
||||
if err := s.CoordinateBatchUpdate(3, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Should go through now.
|
||||
idx, coords, err = s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 3 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if !reflect.DeepEqual(coords, updates) {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
|
||||
// Also verify the raw coordinate interface.
|
||||
for _, update := range updates {
|
||||
coord, err := s.CoordinateGetRaw(update.Node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(coord, update.Coord) {
|
||||
t.Fatalf("bad: %#v", coord)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the coordinate for one of the nodes.
|
||||
updates[1].Coord = generateRandomCoordinate()
|
||||
if err := s.CoordinateBatchUpdate(4, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Verify it got applied.
|
||||
idx, coords, err = s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 4 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if !reflect.DeepEqual(coords, updates) {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
|
||||
// And check the raw coordinate version of the same thing.
|
||||
for _, update := range updates {
|
||||
coord, err := s.CoordinateGetRaw(update.Node)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(coord, update.Coord) {
|
||||
t.Fatalf("bad: %#v", coord)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateStore_Coordinate_Cleanup(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
// Register a node and update its coordinate.
|
||||
testRegisterNode(t, s, 1, "node1")
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(2, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Make sure it's in there.
|
||||
coord, err := s.CoordinateGetRaw("node1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(coord, updates[0].Coord) {
|
||||
t.Fatalf("bad: %#v", coord)
|
||||
}
|
||||
|
||||
// Now delete the node.
|
||||
if err := s.DeleteNode(3, "node1"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Make sure the coordinate is gone.
|
||||
coord, err = s.CoordinateGetRaw("node1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if coord != nil {
|
||||
t.Fatalf("bad: %#v", coord)
|
||||
}
|
||||
|
||||
// Make sure the index got updated.
|
||||
idx, coords, err := s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 3 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if coords != nil {
|
||||
t.Fatalf("bad: %#v", coords)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateStore_Coordinate_Snapshot_Restore(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
// Register two nodes and update their coordinates.
|
||||
testRegisterNode(t, s, 1, "node1")
|
||||
testRegisterNode(t, s, 2, "node2")
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
&structs.Coordinate{
|
||||
Node: "node2",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(3, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Snapshot the coordinates.
|
||||
snap := s.Snapshot()
|
||||
defer snap.Close()
|
||||
|
||||
// Alter the real state store.
|
||||
trash := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
&structs.Coordinate{
|
||||
Node: "node2",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(4, trash); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Verify the snapshot.
|
||||
if idx := snap.LastIndex(); idx != 3 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
iter, err := snap.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
var dump structs.Coordinates
|
||||
for coord := iter.Next(); coord != nil; coord = iter.Next() {
|
||||
dump = append(dump, coord.(*structs.Coordinate))
|
||||
}
|
||||
if !reflect.DeepEqual(dump, updates) {
|
||||
t.Fatalf("bad: %#v", dump)
|
||||
}
|
||||
|
||||
// Restore the values into a new state store.
|
||||
func() {
|
||||
s := testStateStore(t)
|
||||
restore := s.Restore()
|
||||
if err := restore.Coordinates(5, dump); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
restore.Commit()
|
||||
|
||||
// Read the restored coordinates back out and verify that they match.
|
||||
idx, res, err := s.Coordinates()
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if idx != 5 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
if !reflect.DeepEqual(res, updates) {
|
||||
t.Fatalf("bad: %#v", res)
|
||||
}
|
||||
|
||||
// Check that the index was updated (note that it got passed
|
||||
// in during the restore).
|
||||
if idx := s.maxIndex("coordinates"); idx != 5 {
|
||||
t.Fatalf("bad index: %d", idx)
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
|
||||
func TestStateStore_Coordinate_Watches(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
testRegisterNode(t, s, 1, "node1")
|
||||
|
||||
// Call functions that update the coordinates table and make sure a watch fires
|
||||
// each time.
|
||||
verifyWatch(t, s.getTableWatch("coordinates"), func() {
|
||||
updates := structs.Coordinates{
|
||||
&structs.Coordinate{
|
||||
Node: "node1",
|
||||
Coord: generateRandomCoordinate(),
|
||||
},
|
||||
}
|
||||
if err := s.CoordinateBatchUpdate(2, updates); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
})
|
||||
verifyWatch(t, s.getTableWatch("coordinates"), func() {
|
||||
if err := s.DeleteNode(3, "node1"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -31,6 +32,7 @@ const (
|
|||
SessionRequestType
|
||||
ACLRequestType
|
||||
TombstoneRequestType
|
||||
CoordinateBatchUpdateType
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -182,9 +184,18 @@ func (r *DeregisterRequest) RequestDatacenter() string {
|
|||
return r.Datacenter
|
||||
}
|
||||
|
||||
// QuerySource is used to pass along information about the source node
|
||||
// in queries so that we can adjust the response based on its network
|
||||
// coordinates.
|
||||
type QuerySource struct {
|
||||
Datacenter string
|
||||
Node string
|
||||
}
|
||||
|
||||
// DCSpecificRequest is used to query about a specific DC
|
||||
type DCSpecificRequest struct {
|
||||
Datacenter string
|
||||
Source QuerySource
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
|
@ -192,12 +203,13 @@ func (r *DCSpecificRequest) RequestDatacenter() string {
|
|||
return r.Datacenter
|
||||
}
|
||||
|
||||
// ServiceSpecificRequest is used to query about a specific node
|
||||
// ServiceSpecificRequest is used to query about a specific service
|
||||
type ServiceSpecificRequest struct {
|
||||
Datacenter string
|
||||
ServiceName string
|
||||
ServiceTag string
|
||||
TagFilter bool // Controls tag filtering
|
||||
Source QuerySource
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
|
@ -220,6 +232,7 @@ func (r *NodeSpecificRequest) RequestDatacenter() string {
|
|||
type ChecksInStateRequest struct {
|
||||
Datacenter string
|
||||
State string
|
||||
Source QuerySource
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
|
@ -344,7 +357,7 @@ type HealthCheck struct {
|
|||
type HealthChecks []*HealthCheck
|
||||
|
||||
// CheckServiceNode is used to provide the node, its service
|
||||
// definition, as well as a HealthCheck that is associated
|
||||
// definition, as well as a HealthCheck that is associated.
|
||||
type CheckServiceNode struct {
|
||||
Node *Node
|
||||
Service *NodeService
|
||||
|
@ -618,6 +631,49 @@ type ACLPolicy struct {
|
|||
QueryMeta
|
||||
}
|
||||
|
||||
// Coordinate stores a node name with its associated network coordinate.
|
||||
type Coordinate struct {
|
||||
Node string
|
||||
Coord *coordinate.Coordinate
|
||||
}
|
||||
|
||||
type Coordinates []*Coordinate
|
||||
|
||||
// IndexedCoordinate is used to represent a single node's coordinate from the state
|
||||
// store.
|
||||
type IndexedCoordinate struct {
|
||||
Coord *coordinate.Coordinate
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
// IndexedCoordinates is used to represent a list of nodes and their
|
||||
// corresponding raw coordinates.
|
||||
type IndexedCoordinates struct {
|
||||
Coordinates Coordinates
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
// DatacenterMap is used to represent a list of nodes with their raw coordinates,
|
||||
// associated with a datacenter.
|
||||
type DatacenterMap struct {
|
||||
Datacenter string
|
||||
Coordinates Coordinates
|
||||
}
|
||||
|
||||
// CoordinateUpdateRequest is used to update the network coordinate of a given
|
||||
// node.
|
||||
type CoordinateUpdateRequest struct {
|
||||
Datacenter string
|
||||
Node string
|
||||
Coord *coordinate.Coordinate
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given update request.
|
||||
func (c *CoordinateUpdateRequest) RequestDatacenter() string {
|
||||
return c.Datacenter
|
||||
}
|
||||
|
||||
// EventFireRequest is used to ask a server to fire
|
||||
// a Serf event. It is a bit odd, since it doesn't depend on
|
||||
// the catalog or leader. Any node can respond, so it's not quite
|
||||
|
|
|
@ -141,7 +141,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) {
|
|||
return true, parts
|
||||
}
|
||||
|
||||
// Returns if a member is a consul node. Returns a boo,
|
||||
// Returns if a member is a consul node. Returns a bool,
|
||||
// and the datacenter.
|
||||
func isConsulNode(m serf.Member) (bool, string) {
|
||||
if m.Tags["role"] != "node" {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
source "https://rubygems.org"
|
||||
|
||||
ruby "2.2.2"
|
||||
ruby "2.2.3"
|
||||
|
||||
gem "middleman-hashicorp", github: "hashicorp/middleman-hashicorp"
|
||||
|
|
|
@ -186,6 +186,3 @@ PLATFORMS
|
|||
|
||||
DEPENDENCIES
|
||||
middleman-hashicorp!
|
||||
|
||||
BUNDLED WITH
|
||||
1.10.6
|
||||
|
|
|
@ -14,17 +14,18 @@ to enable changes without breaking backwards compatibility.
|
|||
|
||||
Each endpoint manages a different aspect of Consul:
|
||||
|
||||
* [kv](http/kv.html) - Key/Value store
|
||||
* [acl](http/acl.html) - Access Control Lists
|
||||
* [agent](http/agent.html) - Consul Agent
|
||||
* [catalog](http/catalog.html) - Nodes and services
|
||||
* [health](http/health.html) - Health checks
|
||||
* [session](http/session.html) - Sessions
|
||||
* [acl](http/acl.html) - Access Control Lists
|
||||
* [coordinate](http/coordinate.html) - Network coordinates
|
||||
* [event](http/event.html) - User Events
|
||||
* [health](http/health.html) - Health checks
|
||||
* [kv](http/kv.html) - Key/Value store
|
||||
* [session](http/session.html) - Sessions
|
||||
* [status](http/status.html) - Consul system status
|
||||
* internal - Internal APIs. Purposely undocumented, subject to change.
|
||||
|
||||
Each of these is documented in detail at the links above.
|
||||
Each of these is documented in detail at the links above. Consul also has a number
|
||||
of internal APIs which are purposely undocumented and subject to change.
|
||||
|
||||
## Blocking Queries
|
||||
|
||||
|
|
|
@ -163,6 +163,11 @@ It returns a JSON body like this:
|
|||
"EnableSyslog": false,
|
||||
"RejoinAfterLeave": false
|
||||
},
|
||||
"Coord": {
|
||||
"Adjustment": 0,
|
||||
"Error": 1.5,
|
||||
"Vec": [0,0,0,0,0,0,0,0]
|
||||
},
|
||||
"Member": {
|
||||
"Name": "foobar",
|
||||
"Addr": "10.1.10.12",
|
||||
|
|
|
@ -159,6 +159,10 @@ If the API call succeeds a 200 status code is returned.
|
|||
This endpoint is hit with a GET and is used to return all the
|
||||
datacenters that are known by the Consul server.
|
||||
|
||||
The datacenters will be sorted in ascending order based on the
|
||||
estimated median round trip time from the server to the servers
|
||||
in that datacenter.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
|
@ -175,6 +179,11 @@ This endpoint is hit with a GET and returns the nodes registered
|
|||
in a given DC. By default, the datacenter of the agent is queried;
|
||||
however, the dc can be provided using the "?dc=" query parameter.
|
||||
|
||||
Adding the optional "?near=" parameter with a node name will sort
|
||||
the node list in ascending order based on the estimated round trip
|
||||
time from that node. Passing "?near=_agent" will use the agent's
|
||||
node for the sort.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
|
@ -226,6 +235,11 @@ The service being queried must be provided on the path. By default
|
|||
all nodes in that service are returned. However, the list can be filtered
|
||||
by tag using the "?tag=" query parameter.
|
||||
|
||||
Adding the optional "?near=" parameter with a node name will sort
|
||||
the node list in ascending order based on the estimated round trip
|
||||
time from that node. Passing "?near=_agent" will use the agent's
|
||||
node for the sort.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
|
|
78
website/source/docs/agent/http/coordinate.html.markdown
Normal file
78
website/source/docs/agent/http/coordinate.html.markdown
Normal file
|
@ -0,0 +1,78 @@
|
|||
---
|
||||
layout: "docs"
|
||||
page_title: "Coordinate (HTTP)"
|
||||
sidebar_current: "docs-agent-http-coordinate"
|
||||
description: >
|
||||
The Coordinate endpoint is used to query for the nework coordinates for
|
||||
nodes in the local datacenter as well as Consul servers in the local
|
||||
datacenter and remote datacenters.
|
||||
---
|
||||
|
||||
# Coordinate HTTP Endpoint
|
||||
|
||||
The Coordinate endpoint is used to query for the nework coordinates for nodes
|
||||
in the local datacenter as well as Consul servers in the local datacenter and
|
||||
remote datacenters.
|
||||
|
||||
See the [Network Coordinates](/docs/internals/coordinates.html) internals guide
|
||||
for more information on how these coordinates are computed, and for details on
|
||||
how to perform calculations with them.
|
||||
|
||||
The following endpoints are supported:
|
||||
|
||||
* [`/v1/coordinate/datacenters`](#coordinate_datacenters) : Queries for WAN coordinates of Consul servers
|
||||
* [`/v1/coordinate/nodes`](#coordinate_nodes) : Queries for LAN coordinates of Consul nodes
|
||||
|
||||
### <a name="coordinate_datacenters"></a> /v1/coordinate/datacenters
|
||||
|
||||
This endpoint is hit with a GET and returns the WAN network coordinates for
|
||||
all Consul servers, organized by DCs.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
[
|
||||
{
|
||||
"Datacenter": "dc1",
|
||||
"Coordinates": [
|
||||
{
|
||||
"Node": "agent-one",
|
||||
"Coord": {
|
||||
"Adjustment": 0,
|
||||
"Error": 1.5,
|
||||
"Height": 0,
|
||||
"Vec": [0,0,0,0,0,0,0,0]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
This endpoint serves data out of the server's local Serf data about the WAN, so
|
||||
its results may vary as requests are handled by different servers in the
|
||||
cluster. Also, it does not support blocking queries or any consistency modes.
|
||||
|
||||
### <a name=""coordinate_nodes></a> /v1/coordinate/nodes
|
||||
|
||||
This endpoint is hit with a GET and returns the LAN network coordinates for
|
||||
all nodes in a given DC. By default, the datacenter of the agent is queried;
|
||||
however, the dc can be provided using the "?dc=" query parameter.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
[
|
||||
{
|
||||
"Node": "agent-one",
|
||||
"Coord": {
|
||||
"Adjustment": 0,
|
||||
"Error": 1.5,
|
||||
"Height": 0,
|
||||
"Vec": [0,0,0,0,0,0,0,0]
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
This endpoint supports blocking queries and all consistency modes.
|
|
@ -70,6 +70,11 @@ This endpoint is hit with a GET and returns the checks associated with
|
|||
the service provided on the path. By default, the datacenter of the agent is queried;
|
||||
however, the dc can be provided using the "?dc=" query parameter.
|
||||
|
||||
Adding the optional "?near=" parameter with a node name will sort
|
||||
the node list in ascending order based on the estimated round trip
|
||||
time from that node. Passing "?near=_agent" will use the agent's
|
||||
node for the sort.
|
||||
|
||||
It returns a JSON body like this:
|
||||
|
||||
```javascript
|
||||
|
@ -95,6 +100,11 @@ This endpoint is hit with a GET and returns the nodes providing
|
|||
the service indicated on the path. By default, the datacenter of the agent is queried;
|
||||
however, the dc can be provided using the "?dc=" query parameter.
|
||||
|
||||
Adding the optional "?near=" parameter with a node name will sort
|
||||
the node list in ascending order based on the estimated round trip
|
||||
time from that node. Passing "?near=_agent" will use the agent's
|
||||
node for the sort.
|
||||
|
||||
By default, all nodes matching the service are returned. The list can be filtered
|
||||
by tag using the "?tag=" query parameter.
|
||||
|
||||
|
@ -159,6 +169,11 @@ This endpoint is hit with a GET and returns the checks in the
|
|||
state provided on the path. By default, the datacenter of the agent is queried;
|
||||
however, the dc can be provided using the "?dc=" query parameter.
|
||||
|
||||
Adding the optional "?near=" parameter with a node name will sort
|
||||
the node list in ascending order based on the estimated round trip
|
||||
time from that node. Passing "?near=_agent" will use the agent's
|
||||
node for the sort.
|
||||
|
||||
The supported states are `any`, `unknown`, `passing`, `warning`, or `critical`.
|
||||
The `any` state is a wildcard that can be used to return all checks.
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ Available commands are:
|
|||
members Lists the members of a Consul cluster
|
||||
monitor Stream logs from a Consul agent
|
||||
reload Triggers the agent to reload configuration files
|
||||
rtt Estimates network round trip time between nodes
|
||||
version Prints the Consul version
|
||||
watch Watch for changes in Consul
|
||||
```
|
||||
|
|
55
website/source/docs/commands/rtt.html.markdown
Normal file
55
website/source/docs/commands/rtt.html.markdown
Normal file
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
layout: "docs"
|
||||
page_title: "Commands: RTT"
|
||||
sidebar_current: "docs-commands-rtt"
|
||||
description: >
|
||||
The rtt command estimates the network round trip time between two nodes.
|
||||
---
|
||||
|
||||
# Consul RTT
|
||||
|
||||
Command: `consul rtt`
|
||||
|
||||
The `rtt` command estimates the network round trip time between two nodes using
|
||||
Consul's network coordinate model of the cluster.
|
||||
|
||||
See the [Network Coordinates](/docs/internals/coordinates.html) internals guide
|
||||
for more information on how these coordinates are computed.
|
||||
|
||||
## Usage
|
||||
|
||||
Usage: `consul rtt [options] node1 [node2]`
|
||||
|
||||
At least one node name is required. If the second node name isn't given, it
|
||||
is set to the agent's node name. Note that these are node names as known to
|
||||
Consul as `consul members` would show, not IP addresses.
|
||||
|
||||
The list of available flags are:
|
||||
|
||||
* `-wan` - Instructs the command to use WAN coordinates instead of LAN
|
||||
coordinates. By default, the two nodes are assumed to be nodes in the local
|
||||
datacenter and the LAN coordinates are used. If the -wan option is given,
|
||||
then the WAN coordinates are used, and the node names must be suffixed by a period
|
||||
and the datacenter (eg. "myserver.dc1"). It is not possible to measure between
|
||||
LAN coordinates and WAN coordinates, so both nodes must be in the same pool.
|
||||
|
||||
|
||||
* `-http-addr` - Address to the HTTP server of the agent you want to contact
|
||||
to send this command. If this isn't specified, the command will contact
|
||||
"127.0.0.1:8500" which is the default HTTP address of a Consul agent.
|
||||
|
||||
## Output
|
||||
|
||||
If coordinates are available, the command will print the estimated round trip
|
||||
time between the given nodes:
|
||||
|
||||
```
|
||||
$ consul rtt n1 n2
|
||||
Estimated n1 <-> n2 rtt: 0.610 ms (using LAN coordinates)
|
||||
|
||||
$ consul rtt n2 # Running from n1
|
||||
Estimated n1 <-> n2 rtt: 0.610 ms (using LAN coordinates)
|
||||
|
||||
$ consul rtt -wan n1.dc1 n2.dc2
|
||||
Estimated n1.dc1 <-> n2.dc2 rtt: 1.275 ms (using WAN coordinates)
|
||||
```
|
101
website/source/docs/internals/coordinates.html.markdown
Normal file
101
website/source/docs/internals/coordinates.html.markdown
Normal file
|
@ -0,0 +1,101 @@
|
|||
---
|
||||
layout: "docs"
|
||||
page_title: "Network Coordinates"
|
||||
sidebar_current: "docs-internals-coordinates"
|
||||
description: |-
|
||||
Serf uses a network tomography system to compute network coordinates for nodes in the cluster. These coordinates are useful for easily calculating the estimated network round trip time between any two nodes in the cluster. This page documents the details of this system. The core of the network tomography system us based on Vivaldi: A Decentralized Network Coordinate System, with several improvements based on several follow-on papers.
|
||||
---
|
||||
|
||||
# Network Coordinates
|
||||
|
||||
Consul uses a [network tomography](https://en.wikipedia.org/wiki/Network_tomography)
|
||||
system to compute network coordinates for nodes in the cluster. These coordinates
|
||||
allow the network round trip time to be estimated between any two nodes using a
|
||||
a very simple calculation. This allows for many useful applications, such as finding
|
||||
the service node nearest a requesting node, or failing over to services in the next
|
||||
closest datacenter.
|
||||
|
||||
All of this is provided through the use of the [Serf library](https://www.serfdom.io/).
|
||||
Serf's network tomography is based on ["Vivaldi: A Decentralized Network Coordinate System"](http://www.cs.ucsb.edu/~ravenben/classes/276/papers/vivaldi-sigcomm04.pdf),
|
||||
with some enhancements based on other research. There are more details about
|
||||
[Serf's network coordinates here](https://www.serfdom.io/docs/internals/coordinates.html).
|
||||
|
||||
~> **Advanced Topic!** This page covers the technical details of
|
||||
the internals of Consul. You don't need to know these details to effectively
|
||||
operate and use Consul. These details are documented here for those who wish
|
||||
to learn about them without having to go spelunking through the source code.
|
||||
|
||||
## Network Coordinates in Consul
|
||||
|
||||
Network coordinates manifest in several ways inside Consul:
|
||||
|
||||
* The [`consul rtt`](/docs/commands/rtt.html) command can be used to query for the
|
||||
network round trip time between any two nodes.
|
||||
|
||||
* The [Catalog endpoints](/docs/agent/http/catalog.html) and
|
||||
[Health endpoints](/docs/agent/http/health.html) can sort the results of queries based
|
||||
on the network round trip time from a given node using a "?near=" parameter.
|
||||
|
||||
* The [Coordinate endpoint](/docs/agent/http/coordinate.html) exposes raw network
|
||||
coordinates for use in other applications.
|
||||
|
||||
Consul uses Serf to manage two different gossip pools, one for the LAN with members
|
||||
of a given datacenter, and one for the WAN which is made up of just the Consul servers
|
||||
in all datacenters. It's important to note that **network coordinates are not compatible
|
||||
between these two pools**. LAN coordinates only make sense in calculations with other
|
||||
LAN coordinates, and WAN coordinates only make sense with other WAN coordinates.
|
||||
|
||||
## Working with Coordinates
|
||||
|
||||
Computing the estimated network round trip time between any two nodes is simple
|
||||
once you have their coordinates. Here's a sample coordinate, as returned from the
|
||||
[Coordinate endpoint](/docs/agent/http/coordinate.html).
|
||||
|
||||
```
|
||||
"Coord": {
|
||||
"Adjustment": 0.1,
|
||||
"Error": 1.5,
|
||||
"Height": 0.02,
|
||||
"Vec": [0.34,0.68,0.003,0.01,0.05,0.1,0.34,0.06]
|
||||
}
|
||||
```
|
||||
|
||||
All values are floating point numbers in units of seconds, except for the error
|
||||
term which isn't used for distance calculations.
|
||||
|
||||
Here's a complete example in Go showing how to compute the distance between two
|
||||
coordinates:
|
||||
|
||||
```
|
||||
import (
|
||||
"github.com/hashicorp/serf/coordinate"
|
||||
"math"
|
||||
"time"
|
||||
)
|
||||
|
||||
func dist(a *coordinate.Coordinate, b *coordinate.Coordinate) time.Duration {
|
||||
// Coordinates will always have the same dimensionality, so this is
|
||||
// just a sanity check.
|
||||
if len(a.Vec) != len(b.Vec) {
|
||||
panic("dimensions aren't compatible")
|
||||
}
|
||||
|
||||
// Calculate the Euclidean distance plus the heights.
|
||||
sumsq := 0.0
|
||||
for i := 0; i < len(a.Vec); i++ {
|
||||
diff := a.Vec[i] - b.Vec[i]
|
||||
sumsq += diff * diff
|
||||
}
|
||||
rtt := math.Sqrt(sumsq) + a.Height + b.Height
|
||||
|
||||
// Apply the adjustment components, guarding against negatives.
|
||||
adjusted := rtt + a.Adjustment + b.Adjustment
|
||||
if adjusted > 0.0 {
|
||||
rtt = adjusted
|
||||
}
|
||||
|
||||
// Go's times are natively nanoseconds, so we convert from seconds.
|
||||
const secondsToNanoseconds = 1.0e9
|
||||
return time.Duration(rtt * secondsToNanoseconds)
|
||||
}
|
||||
```
|
|
@ -38,6 +38,10 @@
|
|||
<a href="/docs/internals/gossip.html">Gossip Protocol</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-internals-coordinates") %>>
|
||||
<a href="/docs/internals/coordinates.html">Network Coordinates</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-internals-sessions") %>>
|
||||
<a href="/docs/internals/sessions.html">Sessions</a>
|
||||
</li>
|
||||
|
@ -96,8 +100,7 @@
|
|||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-commands-leave") %>>
|
||||
<a href="/docs/commands/leave.html">leave</a>
|
||||
</li>
|
||||
<a href="/docs/commands/leave.html">leave</a></li>
|
||||
|
||||
<li<%= sidebar_current("docs-commands-lock") %>>
|
||||
<a href="/docs/commands/lock.html">lock</a>
|
||||
|
@ -123,6 +126,10 @@
|
|||
<a href="/docs/commands/reload.html">reload</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-commands-rtt") %>>
|
||||
<a href="/docs/commands/rtt.html">rtt</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-commands-watch") %>>
|
||||
<a href="/docs/commands/watch.html">watch</a>
|
||||
</li>
|
||||
|
@ -143,8 +150,8 @@
|
|||
<li<%= sidebar_current("docs-agent-http") %>>
|
||||
<a href="/docs/agent/http.html">HTTP API</a>
|
||||
<ul class="subnav">
|
||||
<li<%= sidebar_current("docs-agent-http-kv") %>>
|
||||
<a href="/docs/agent/http/kv.html">Key/Value store</a>
|
||||
<li<%= sidebar_current("docs-agent-http-acl") %>>
|
||||
<a href="/docs/agent/http/acl.html">ACLs</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-agent") %>>
|
||||
|
@ -155,20 +162,24 @@
|
|||
<a href="/docs/agent/http/catalog.html">Catalog</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-session") %>>
|
||||
<a href="/docs/agent/http/session.html">Sessions</a>
|
||||
<li<%= sidebar_current("docs-agent-http-event") %>>
|
||||
<a href="/docs/agent/http/event.html">Events</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-health") %>>
|
||||
<a href="/docs/agent/http/health.html">Health Checks</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-acl") %>>
|
||||
<a href="/docs/agent/http/acl.html">ACLs</a>
|
||||
<li<%= sidebar_current("docs-agent-http-kv") %>>
|
||||
<a href="/docs/agent/http/kv.html">Key/Value store</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-event") %>>
|
||||
<a href="/docs/agent/http/event.html">Events</a>
|
||||
<li<%= sidebar_current("docs-agent-http-coordinate") %>>
|
||||
<a href="/docs/agent/http/coordinate.html">Network Coordinates</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-session") %>>
|
||||
<a href="/docs/agent/http/session.html">Sessions</a>
|
||||
</li>
|
||||
|
||||
<li<%= sidebar_current("docs-agent-http-status") %>>
|
||||
|
|
Loading…
Reference in a new issue