2013-12-19 22:48:14 +00:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2016-10-26 02:20:24 +00:00
|
|
|
"io"
|
2013-12-19 22:48:14 +00:00
|
|
|
"log"
|
|
|
|
"os"
|
2014-02-24 00:37:33 +00:00
|
|
|
"strconv"
|
2013-12-19 22:48:14 +00:00
|
|
|
"sync"
|
2013-12-19 23:42:17 +00:00
|
|
|
"time"
|
2015-01-06 23:48:46 +00:00
|
|
|
|
2017-09-01 22:02:50 +00:00
|
|
|
"github.com/armon/go-metrics"
|
2017-06-15 13:16:16 +00:00
|
|
|
"github.com/hashicorp/consul/agent/pool"
|
2017-07-06 10:40:54 +00:00
|
|
|
"github.com/hashicorp/consul/agent/router"
|
2017-07-06 10:34:00 +00:00
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2017-01-18 06:20:11 +00:00
|
|
|
"github.com/hashicorp/consul/lib"
|
2015-01-06 23:48:46 +00:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2017-09-01 22:02:50 +00:00
|
|
|
"golang.org/x/time/rate"
|
2013-12-19 22:48:14 +00:00
|
|
|
)
|
|
|
|
|
2014-05-27 21:33:09 +00:00
|
|
|
const (
|
2016-02-20 01:32:16 +00:00
|
|
|
// clientRPCConnMaxIdle controls how long we keep an idle connection
|
|
|
|
// open to a server. 127s was chosen as the first prime above 120s
|
|
|
|
// (arbitrarily chose to use a prime) with the intent of reusing
|
|
|
|
// connections who are used by once-a-minute cron(8) jobs *and* who
|
|
|
|
// use a 60s jitter window (e.g. in vixie cron job execution can
|
|
|
|
// drift by up to 59s per job, or 119s for a once-a-minute cron job).
|
|
|
|
clientRPCConnMaxIdle = 127 * time.Second
|
2014-05-27 21:33:09 +00:00
|
|
|
|
2015-09-15 12:22:08 +00:00
|
|
|
// clientMaxStreams controls how many idle streams we keep
|
2014-05-27 21:33:09 +00:00
|
|
|
// open to a server
|
|
|
|
clientMaxStreams = 32
|
2016-02-19 01:46:02 +00:00
|
|
|
|
|
|
|
// serfEventBacklog is the maximum number of unprocessed Serf Events
|
|
|
|
// that will be held in queue before new serf events block. A
|
|
|
|
// blocking serf event queue is a bad thing.
|
|
|
|
serfEventBacklog = 256
|
|
|
|
|
|
|
|
// serfEventBacklogWarning is the threshold at which point log
|
|
|
|
// warnings will be emitted indicating a problem when processing serf
|
|
|
|
// events.
|
|
|
|
serfEventBacklogWarning = 200
|
2014-02-03 19:53:04 +00:00
|
|
|
)
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// Client is Consul client which uses RPC to communicate with the
|
|
|
|
// services for service discovery, health checking, and DC forwarding.
|
|
|
|
type Client struct {
|
|
|
|
config *Config
|
|
|
|
|
|
|
|
// Connection pool to consul servers
|
2017-06-15 13:16:16 +00:00
|
|
|
connPool *pool.ConnPool
|
2013-12-19 22:48:14 +00:00
|
|
|
|
2017-07-06 10:40:54 +00:00
|
|
|
// routers is responsible for the selection and maintenance of
|
2016-03-25 18:57:54 +00:00
|
|
|
// Consul servers this agent uses for RPC requests
|
2017-07-06 10:40:54 +00:00
|
|
|
routers *router.Manager
|
2013-12-19 22:48:14 +00:00
|
|
|
|
2017-09-01 22:02:50 +00:00
|
|
|
// rpcLimiter is used to rate limit the total number of RPCs initiated
|
|
|
|
// from an agent.
|
|
|
|
rpcLimiter *rate.Limiter
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// eventCh is used to receive events from the
|
|
|
|
// serf cluster in the datacenter
|
|
|
|
eventCh chan serf.Event
|
|
|
|
|
|
|
|
// Logger uses the provided LogOutput
|
|
|
|
logger *log.Logger
|
|
|
|
|
|
|
|
// serf is the Serf cluster maintained inside the DC
|
|
|
|
// which contains all the DC nodes
|
|
|
|
serf *serf.Serf
|
|
|
|
|
|
|
|
shutdown bool
|
|
|
|
shutdownCh chan struct{}
|
|
|
|
shutdownLock sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewClient is used to construct a new Consul client from the
|
|
|
|
// configuration, potentially returning an error
|
|
|
|
func NewClient(config *Config) (*Client, error) {
|
2017-05-31 09:05:02 +00:00
|
|
|
return NewClientLogger(config, nil)
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewClientLogger(config *Config, logger *log.Logger) (*Client, error) {
|
2014-03-09 22:18:36 +00:00
|
|
|
// Check the protocol version
|
2017-05-03 19:02:01 +00:00
|
|
|
if err := config.CheckProtocolVersion(); err != nil {
|
2014-03-09 22:18:36 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// Check for a data directory!
|
|
|
|
if config.DataDir == "" {
|
|
|
|
return nil, fmt.Errorf("Config must provide a DataDir")
|
|
|
|
}
|
|
|
|
|
2014-08-05 22:20:35 +00:00
|
|
|
// Sanity check the ACLs
|
|
|
|
if err := config.CheckACL(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// Ensure we have a log output
|
|
|
|
if config.LogOutput == nil {
|
|
|
|
config.LogOutput = os.Stderr
|
|
|
|
}
|
|
|
|
|
2015-05-11 22:15:36 +00:00
|
|
|
// Create the tls Wrapper
|
|
|
|
tlsWrap, err := config.tlsConfig().OutgoingTLSWrapper()
|
|
|
|
if err != nil {
|
2014-06-22 19:49:51 +00:00
|
|
|
return nil, err
|
2014-04-04 23:28:14 +00:00
|
|
|
}
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// Create a logger
|
2017-05-31 09:05:02 +00:00
|
|
|
if logger == nil {
|
|
|
|
logger = log.New(config.LogOutput, "", log.LstdFlags)
|
|
|
|
}
|
2013-12-19 22:48:14 +00:00
|
|
|
|
2017-06-15 13:16:16 +00:00
|
|
|
connPool := &pool.ConnPool{
|
|
|
|
SrcAddr: config.RPCSrcAddr,
|
|
|
|
LogOutput: config.LogOutput,
|
|
|
|
MaxTime: clientRPCConnMaxIdle,
|
|
|
|
MaxStreams: clientMaxStreams,
|
|
|
|
TLSWrapper: tlsWrap,
|
|
|
|
ForceTLS: config.VerifyOutgoing,
|
|
|
|
}
|
|
|
|
|
2017-09-01 22:02:50 +00:00
|
|
|
// Create client
|
2013-12-19 22:48:14 +00:00
|
|
|
c := &Client{
|
|
|
|
config: config,
|
2017-06-15 13:16:16 +00:00
|
|
|
connPool: connPool,
|
2017-09-01 22:02:50 +00:00
|
|
|
rpcLimiter: rate.NewLimiter(config.RPCRate, config.RPCMaxBurst),
|
2016-02-19 01:46:02 +00:00
|
|
|
eventCh: make(chan serf.Event, serfEventBacklog),
|
2013-12-19 22:48:14 +00:00
|
|
|
logger: logger,
|
|
|
|
shutdownCh: make(chan struct{}),
|
|
|
|
}
|
|
|
|
|
2016-03-27 04:59:45 +00:00
|
|
|
// Start lan event handlers before lan Serf setup to prevent deadlock
|
2013-12-19 22:48:14 +00:00
|
|
|
go c.lanEventHandler()
|
|
|
|
|
|
|
|
// Initialize the lan Serf
|
|
|
|
c.serf, err = c.setupSerf(config.SerfLANConfig,
|
|
|
|
c.eventCh, serfLANSnapshot)
|
|
|
|
if err != nil {
|
|
|
|
c.Shutdown()
|
|
|
|
return nil, fmt.Errorf("Failed to start lan serf: %v", err)
|
|
|
|
}
|
2016-03-27 04:59:45 +00:00
|
|
|
|
2016-03-29 22:58:15 +00:00
|
|
|
// Start maintenance task for servers
|
2017-07-06 10:40:54 +00:00
|
|
|
c.routers = router.New(c.logger, c.shutdownCh, c.serf, c.connPool)
|
|
|
|
go c.routers.Start()
|
2016-03-27 04:59:45 +00:00
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
return c, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown is used to shutdown the client
|
|
|
|
func (c *Client) Shutdown() error {
|
2014-01-10 19:06:11 +00:00
|
|
|
c.logger.Printf("[INFO] consul: shutting down client")
|
2013-12-19 22:48:14 +00:00
|
|
|
c.shutdownLock.Lock()
|
|
|
|
defer c.shutdownLock.Unlock()
|
|
|
|
|
|
|
|
if c.shutdown {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
c.shutdown = true
|
|
|
|
close(c.shutdownCh)
|
|
|
|
|
|
|
|
if c.serf != nil {
|
|
|
|
c.serf.Shutdown()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close the connection pool
|
|
|
|
c.connPool.Shutdown()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Leave is used to prepare for a graceful shutdown
|
|
|
|
func (c *Client) Leave() error {
|
2014-01-10 19:06:11 +00:00
|
|
|
c.logger.Printf("[INFO] consul: client starting leave")
|
2013-12-19 22:48:14 +00:00
|
|
|
|
|
|
|
// Leave the LAN pool
|
|
|
|
if c.serf != nil {
|
|
|
|
if err := c.serf.Leave(); err != nil {
|
2014-01-10 19:06:11 +00:00
|
|
|
c.logger.Printf("[ERR] consul: Failed to leave LAN Serf cluster: %v", err)
|
2013-12-19 22:48:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// JoinLAN is used to have Consul client join the inner-DC pool
|
|
|
|
// The target address should be another node inside the DC
|
|
|
|
// listening on the Serf LAN address
|
2013-12-30 20:20:17 +00:00
|
|
|
func (c *Client) JoinLAN(addrs []string) (int, error) {
|
2014-02-21 00:27:03 +00:00
|
|
|
return c.serf.Join(addrs, true)
|
2013-12-19 22:48:14 +00:00
|
|
|
}
|
|
|
|
|
2014-05-29 18:21:56 +00:00
|
|
|
// LocalMember is used to return the local node
|
|
|
|
func (c *Client) LocalMember() serf.Member {
|
|
|
|
return c.serf.LocalMember()
|
|
|
|
}
|
|
|
|
|
2013-12-19 22:48:14 +00:00
|
|
|
// LANMembers is used to return the members of the LAN cluster
|
|
|
|
func (c *Client) LANMembers() []serf.Member {
|
|
|
|
return c.serf.Members()
|
|
|
|
}
|
|
|
|
|
2017-09-05 19:22:20 +00:00
|
|
|
// LANMembersAllSegments returns members from all segments.
|
|
|
|
func (c *Client) LANMembersAllSegments() ([]serf.Member, error) {
|
|
|
|
return c.serf.Members(), nil
|
|
|
|
}
|
|
|
|
|
2017-08-14 14:36:07 +00:00
|
|
|
// LANSegmentMembers only returns our own segment's members, because clients
|
|
|
|
// can't be in multiple segments.
|
2017-08-30 23:44:04 +00:00
|
|
|
func (c *Client) LANSegmentMembers(segment string) ([]serf.Member, error) {
|
|
|
|
if segment == c.config.Segment {
|
2017-08-14 14:36:07 +00:00
|
|
|
return c.LANMembers(), nil
|
|
|
|
}
|
|
|
|
|
2017-08-30 23:44:04 +00:00
|
|
|
return nil, fmt.Errorf("segment %q not found", segment)
|
2017-08-14 14:36:07 +00:00
|
|
|
}
|
|
|
|
|
2013-12-30 22:42:23 +00:00
|
|
|
// RemoveFailedNode is used to remove a failed node from the cluster
|
|
|
|
func (c *Client) RemoveFailedNode(node string) error {
|
|
|
|
return c.serf.RemoveFailedNode(node)
|
|
|
|
}
|
|
|
|
|
2014-11-20 00:45:49 +00:00
|
|
|
// KeyManagerLAN returns the LAN Serf keyring manager
|
|
|
|
func (c *Client) KeyManagerLAN() *serf.KeyManager {
|
|
|
|
return c.serf.KeyManager()
|
|
|
|
}
|
|
|
|
|
2014-10-04 02:20:58 +00:00
|
|
|
// Encrypted determines if gossip is encrypted
|
|
|
|
func (c *Client) Encrypted() bool {
|
|
|
|
return c.serf.EncryptionEnabled()
|
|
|
|
}
|
|
|
|
|
2013-12-19 23:08:55 +00:00
|
|
|
// RPC is used to forward an RPC call to a consul server, or fail if no servers
|
|
|
|
func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
|
2017-10-10 22:19:50 +00:00
|
|
|
// This is subtle but we start measuring the time on the client side
|
|
|
|
// right at the time of the first request, vs. on the first retry as
|
|
|
|
// is done on the server side inside forward(). This is because the
|
|
|
|
// servers may already be applying the RPCHoldTimeout up there, so by
|
|
|
|
// starting the timer here we won't potentially double up the delay.
|
|
|
|
// TODO (slackpad) Plumb a deadline here with a context.
|
|
|
|
firstCheck := time.Now()
|
|
|
|
|
|
|
|
TRY:
|
2017-07-06 10:40:54 +00:00
|
|
|
server := c.routers.FindServer()
|
2016-02-19 21:17:52 +00:00
|
|
|
if server == nil {
|
|
|
|
return structs.ErrNoServers
|
|
|
|
}
|
|
|
|
|
2017-09-01 22:02:50 +00:00
|
|
|
// Enforce the RPC limit.
|
|
|
|
metrics.IncrCounter([]string{"consul", "client", "rpc"}, 1)
|
2017-10-04 23:43:27 +00:00
|
|
|
metrics.IncrCounter([]string{"client", "rpc"}, 1)
|
2017-09-01 22:02:50 +00:00
|
|
|
if !c.rpcLimiter.Allow() {
|
|
|
|
metrics.IncrCounter([]string{"consul", "client", "rpc", "exceeded"}, 1)
|
2017-10-04 23:43:27 +00:00
|
|
|
metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1)
|
2017-09-01 22:02:50 +00:00
|
|
|
return structs.ErrRPCRateExceeded
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the request.
|
2017-10-10 22:19:50 +00:00
|
|
|
rpcErr := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, server.UseTLS, args, reply)
|
|
|
|
if rpcErr == nil {
|
|
|
|
return nil
|
2014-02-03 19:53:04 +00:00
|
|
|
}
|
|
|
|
|
2017-10-10 22:19:50 +00:00
|
|
|
// Move off to another server, and see if we can retry.
|
|
|
|
c.logger.Printf("[ERR] consul: %q RPC failed to server %s: %v", method, server.Addr, rpcErr)
|
|
|
|
c.routers.NotifyFailedServer(server)
|
|
|
|
if retry := canRetry(args, rpcErr); !retry {
|
|
|
|
return rpcErr
|
|
|
|
}
|
|
|
|
|
|
|
|
// We can wait a bit and retry!
|
2017-10-17 18:38:24 +00:00
|
|
|
if time.Since(firstCheck) < c.config.RPCHoldTimeout {
|
2017-10-10 22:19:50 +00:00
|
|
|
jitter := lib.RandomStagger(c.config.RPCHoldTimeout / jitterFraction)
|
|
|
|
select {
|
|
|
|
case <-time.After(jitter):
|
|
|
|
goto TRY
|
|
|
|
case <-c.shutdownCh:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rpcErr
|
2013-12-19 23:08:55 +00:00
|
|
|
}
|
2014-02-24 00:37:33 +00:00
|
|
|
|
2016-10-26 02:20:24 +00:00
|
|
|
// SnapshotRPC sends the snapshot request to one of the servers, reading from
|
|
|
|
// the streaming input and writing to the streaming output depending on the
|
|
|
|
// operation.
|
|
|
|
func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
|
2017-06-15 09:50:28 +00:00
|
|
|
replyFn structs.SnapshotReplyFn) error {
|
2017-07-06 10:40:54 +00:00
|
|
|
server := c.routers.FindServer()
|
2016-10-26 02:20:24 +00:00
|
|
|
if server == nil {
|
|
|
|
return structs.ErrNoServers
|
|
|
|
}
|
|
|
|
|
2017-09-01 22:02:50 +00:00
|
|
|
// Enforce the RPC limit.
|
|
|
|
metrics.IncrCounter([]string{"consul", "client", "rpc"}, 1)
|
2017-10-04 23:43:27 +00:00
|
|
|
metrics.IncrCounter([]string{"client", "rpc"}, 1)
|
2017-09-01 22:02:50 +00:00
|
|
|
if !c.rpcLimiter.Allow() {
|
|
|
|
metrics.IncrCounter([]string{"consul", "client", "rpc", "exceeded"}, 1)
|
2017-10-04 23:43:27 +00:00
|
|
|
metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1)
|
2017-09-01 22:02:50 +00:00
|
|
|
return structs.ErrRPCRateExceeded
|
|
|
|
}
|
|
|
|
|
2016-10-26 02:20:24 +00:00
|
|
|
// Request the operation.
|
|
|
|
var reply structs.SnapshotResponse
|
2017-05-10 21:25:48 +00:00
|
|
|
snap, err := SnapshotRPC(c.connPool, c.config.Datacenter, server.Addr, server.UseTLS, args, in, &reply)
|
2016-10-26 02:20:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err := snap.Close(); err != nil {
|
|
|
|
c.logger.Printf("[WARN] consul: Failed closing snapshot stream: %v", err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Let the caller peek at the reply.
|
|
|
|
if replyFn != nil {
|
|
|
|
if err := replyFn(&reply); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stream the snapshot.
|
|
|
|
if out != nil {
|
|
|
|
if _, err := io.Copy(out, snap); err != nil {
|
|
|
|
return fmt.Errorf("failed to stream snapshot: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-02-24 00:37:33 +00:00
|
|
|
// Stats is used to return statistics for debugging and insight
|
|
|
|
// for various sub-systems
|
|
|
|
func (c *Client) Stats() map[string]map[string]string {
|
2017-07-06 10:40:54 +00:00
|
|
|
numServers := c.routers.NumServers()
|
2016-02-19 20:13:17 +00:00
|
|
|
|
2014-02-24 00:37:33 +00:00
|
|
|
toString := func(v uint64) string {
|
|
|
|
return strconv.FormatUint(v, 10)
|
|
|
|
}
|
|
|
|
stats := map[string]map[string]string{
|
|
|
|
"consul": map[string]string{
|
2014-02-24 02:08:58 +00:00
|
|
|
"server": "false",
|
2016-02-20 01:32:16 +00:00
|
|
|
"known_servers": toString(uint64(numServers)),
|
2014-02-24 00:37:33 +00:00
|
|
|
},
|
2014-03-09 22:46:03 +00:00
|
|
|
"serf_lan": c.serf.Stats(),
|
2014-04-29 17:55:42 +00:00
|
|
|
"runtime": runtimeStats(),
|
2014-02-24 00:37:33 +00:00
|
|
|
}
|
|
|
|
return stats
|
|
|
|
}
|
2015-04-15 23:12:45 +00:00
|
|
|
|
2017-05-15 14:05:17 +00:00
|
|
|
// GetLANCoordinate returns the network coordinate of the current node, as
|
2015-06-06 03:31:33 +00:00
|
|
|
// maintained by Serf.
|
2017-08-14 14:36:07 +00:00
|
|
|
func (c *Client) GetLANCoordinate() (lib.CoordinateSet, error) {
|
|
|
|
lan, err := c.serf.GetCoordinate()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cs := lib.CoordinateSet{c.config.Segment: lan}
|
|
|
|
return cs, nil
|
2015-04-15 23:12:45 +00:00
|
|
|
}
|