open-nomad/client/client.go

375 lines
9.0 KiB
Go
Raw Normal View History

2015-08-20 22:25:09 +00:00
package client
import (
2015-08-20 23:07:26 +00:00
"fmt"
2015-08-20 22:25:09 +00:00
"io"
"log"
2015-08-20 23:07:26 +00:00
"net"
2015-08-20 22:25:09 +00:00
"os"
2015-08-20 23:07:26 +00:00
"strconv"
2015-08-20 22:25:09 +00:00
"sync"
2015-08-20 23:07:26 +00:00
"time"
2015-08-20 23:53:43 +00:00
"github.com/hashicorp/nomad/client/driver"
2015-08-20 23:41:29 +00:00
"github.com/hashicorp/nomad/client/fingerprint"
2015-08-20 23:07:26 +00:00
"github.com/hashicorp/nomad/nomad"
2015-08-20 23:41:29 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2015-08-20 22:25:09 +00:00
)
2015-08-20 23:07:26 +00:00
const (
// clientRPCCache controls how long we keep an idle connection
// open to a server
clientRPCCache = 30 * time.Second
// clientMaxStreams controsl how many idle streams we keep
// open to a server
clientMaxStreams = 2
2015-08-21 00:49:04 +00:00
// registerRetryIntv is minimum interval on which we retry
// registration. We pick a value between this and 2x this.
registerRetryIntv = 30 * time.Second
2015-08-20 23:07:26 +00:00
)
// RPCHandler can be provided to the Client if there is a local server
// to avoid going over the network. If not provided, the Client will
// maintain a connection pool to the servers
type RPCHandler interface {
RPC(method string, args interface{}, reply interface{}) error
}
2015-08-20 22:25:09 +00:00
// Config is used to parameterize and configure the behavior of the client
type Config struct {
// LogOutput is the destination for logs
LogOutput io.Writer
2015-08-20 23:07:26 +00:00
// Region is the clients region
Region string
// Servers is a list of known server addresses. These are as "host:port"
Servers []string
// RPCHandler can be provided to avoid network traffic if the
// server is running locally.
RPCHandler RPCHandler
2015-08-20 23:41:29 +00:00
// Node provides the base node
Node *structs.Node
2015-08-20 22:25:09 +00:00
}
// DefaultConfig returns the default configuration
func DefaultConfig() *Config {
return &Config{
LogOutput: os.Stderr,
2015-08-21 00:49:04 +00:00
Region: "region1",
2015-08-20 22:25:09 +00:00
}
}
// Client is used to implement the client interaction with Nomad. Clients
// are expected to register as a schedulable node to the servers, and to
// run allocations as determined by the servers.
type Client struct {
2015-08-20 23:07:26 +00:00
config *Config
2015-08-20 22:25:09 +00:00
logger *log.Logger
2015-08-20 23:07:26 +00:00
lastServer net.Addr
lastRPCTime time.Time
lastServerLock sync.Mutex
connPool *nomad.ConnPool
2015-08-23 01:16:05 +00:00
lastHeartbeat time.Time
heartbeatTTL time.Duration
2015-08-20 22:25:09 +00:00
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
// NewClient is used to create a new client from the given configuration
func NewClient(config *Config) (*Client, error) {
// Create a logger
logger := log.New(config.LogOutput, "", log.LstdFlags)
2015-08-20 23:41:29 +00:00
// Create the client
2015-08-20 22:25:09 +00:00
c := &Client{
2015-08-20 23:07:26 +00:00
config: config,
connPool: nomad.NewPool(config.LogOutput, clientRPCCache, clientMaxStreams, nil),
2015-08-20 22:25:09 +00:00
logger: logger,
shutdownCh: make(chan struct{}),
}
2015-08-20 23:41:29 +00:00
// Setup the node
if err := c.setupNode(); err != nil {
return nil, fmt.Errorf("node setup failed: %v", err)
}
// Fingerprint the node
if err := c.fingerprint(); err != nil {
return nil, fmt.Errorf("fingerprinting failed: %v", err)
}
2015-08-20 23:53:43 +00:00
// Scan for drivers
if err := c.setupDrivers(); err != nil {
return nil, fmt.Errorf("driver setup failed: %v", err)
}
2015-08-21 00:49:04 +00:00
// Start the client!
go c.run()
2015-08-20 22:25:09 +00:00
return c, nil
}
// Shutdown is used to tear down the client
func (c *Client) Shutdown() error {
2015-08-20 23:07:26 +00:00
c.logger.Printf("[INFO] client: shutting down")
2015-08-20 22:25:09 +00:00
c.shutdownLock.Lock()
defer c.shutdownLock.Unlock()
if c.shutdown {
return nil
}
c.shutdown = true
close(c.shutdownCh)
2015-08-21 00:49:04 +00:00
c.connPool.Shutdown()
2015-08-20 22:25:09 +00:00
return nil
}
2015-08-20 23:07:26 +00:00
// RPC is used to forward an RPC call to a nomad server, or fail if no servers
func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
// Invoke the RPCHandle if it exists
if c.config.RPCHandler != nil {
return c.config.RPCHandler.RPC(method, args, reply)
}
// Pick a server to request from
addr, err := c.pickServer()
if err != nil {
return err
}
// Make the RPC request
err = c.connPool.RPC(c.config.Region, addr, 1, method, args, reply)
// Update the last server information
c.lastServerLock.Lock()
if err != nil {
c.lastServer = nil
c.lastRPCTime = time.Time{}
} else {
c.lastServer = addr
c.lastRPCTime = time.Now()
}
c.lastServerLock.Unlock()
return err
}
// pickServer is used to pick a target RPC server
func (c *Client) pickServer() (net.Addr, error) {
c.lastServerLock.Lock()
defer c.lastServerLock.Unlock()
// Check for a valid last-used server
if c.lastServer != nil && time.Now().Sub(c.lastRPCTime) < clientRPCCache {
return c.lastServer, nil
}
// Bail if we can't find any servers
if len(c.config.Servers) == 0 {
return nil, fmt.Errorf("no known servers")
}
// Copy the list of servers and shuffle
servers := make([]string, len(c.config.Servers))
copy(servers, c.config.Servers)
shuffleStrings(servers)
// Try to resolve each server
for i := 0; i < len(servers); i++ {
addr, err := net.ResolveTCPAddr("tcp", servers[i])
if err == nil {
c.lastServer = addr
c.lastRPCTime = time.Now()
return addr, nil
}
c.logger.Printf("[WARN] client: failed to resolve '%s': %v", err)
}
// Bail if we reach this point
return nil, fmt.Errorf("failed to resolve any servers")
}
// Stats is used to return statistics for debugging and insight
// for various sub-systems
func (c *Client) Stats() map[string]map[string]string {
toString := func(v uint64) string {
return strconv.FormatUint(v, 10)
}
stats := map[string]map[string]string{
"nomad": map[string]string{
"server": "false",
"known_servers": toString(uint64(len(c.config.Servers))),
},
"runtime": nomad.RuntimeStats(),
}
return stats
}
2015-08-20 23:41:29 +00:00
// Node returns the locally registered node
func (c *Client) Node() *structs.Node {
return c.config.Node
}
// setupNode is used to setup the initial node
func (c *Client) setupNode() error {
node := c.config.Node
if node == nil {
node = &structs.Node{}
c.config.Node = node
}
if node.Attributes == nil {
node.Attributes = make(map[string]string)
}
if node.Links == nil {
node.Links = make(map[string]string)
}
if node.Meta == nil {
node.Meta = make(map[string]string)
}
2015-08-21 00:49:04 +00:00
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
if node.ID == "" {
node.ID = generateUUID()
}
if node.Datacenter == "" {
node.Datacenter = "dc1"
}
if node.Name == "" {
node.Name, _ = os.Hostname()
}
if node.Name == "" {
node.Name = node.ID
}
node.Status = structs.NodeStatusInit
2015-08-20 23:41:29 +00:00
return nil
}
// fingerprint is used to fingerprint the client and setup the node
func (c *Client) fingerprint() error {
var applied []string
for name := range fingerprint.BuiltinFingerprints {
f, err := fingerprint.NewFingerprint(name, c.logger)
if err != nil {
return err
}
applies, err := f.Fingerprint(c.config.Node)
if err != nil {
return err
}
if applies {
applied = append(applied, name)
}
}
c.logger.Printf("[DEBUG] client: applied fingerprints %v", applied)
return nil
}
2015-08-20 23:53:43 +00:00
// setupDrivers is used to find the available drivers
func (c *Client) setupDrivers() error {
var avail []string
for name := range driver.BuiltinDrivers {
d, err := driver.NewDriver(name, c.logger)
if err != nil {
return err
}
applies, err := d.Fingerprint(c.config.Node)
if err != nil {
return err
}
if applies {
avail = append(avail, name)
}
}
c.logger.Printf("[DEBUG] client: available drivers %v", avail)
return nil
}
2015-08-21 00:49:04 +00:00
// run is a long lived goroutine used to run the client
func (c *Client) run() {
// Register the client
for {
if err := c.registerNode(); err == nil {
break
}
select {
case <-time.After(registerRetryIntv + randomStagger(registerRetryIntv)):
case <-c.shutdownCh:
return
}
}
2015-08-23 01:16:05 +00:00
// Setup the heartbeat timer
heartbeat := time.After(c.heartbeatTTL)
// TODO Watch for changes in allocations
2015-08-21 00:49:04 +00:00
2015-08-23 01:16:05 +00:00
// Periodically update our status and wait for termination
2015-08-21 00:49:04 +00:00
select {
2015-08-23 01:16:05 +00:00
case <-heartbeat:
if err := c.updateNodeStatus(); err != nil {
heartbeat = time.After(registerRetryIntv)
} else {
heartbeat = time.After(c.heartbeatTTL)
}
2015-08-21 00:49:04 +00:00
case <-c.shutdownCh:
return
}
}
// registerNode is used to register the node or update the registration
func (c *Client) registerNode() error {
node := c.Node()
req := structs.NodeRegisterRequest{
Node: node,
WriteRequest: structs.WriteRequest{Region: c.config.Region},
}
var resp structs.NodeUpdateResponse
err := c.RPC("Client.Register", &req, &resp)
if err != nil {
c.logger.Printf("[ERR] client: failed to register node: %v", err)
return err
}
c.logger.Printf("[DEBUG] client: node registration complete")
if len(resp.EvalIDs) != 0 {
c.logger.Printf("[DEBUG] client: %d evaluations triggered by node registration", len(resp.EvalIDs))
}
2015-08-23 01:16:05 +00:00
c.lastHeartbeat = time.Now()
c.heartbeatTTL = resp.HeartbeatTTL
return nil
}
// updateNodeStatus is used to heartbeat and update the status of the node
func (c *Client) updateNodeStatus() error {
node := c.Node()
req := structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: c.config.Region},
}
var resp structs.NodeUpdateResponse
err := c.RPC("Client.UpdateStatus", &req, &resp)
if err != nil {
c.logger.Printf("[ERR] client: failed to update status: %v", err)
return err
}
if len(resp.EvalIDs) != 0 {
c.logger.Printf("[DEBUG] client: %d evaluations triggered by node update", len(resp.EvalIDs))
}
if resp.Index != 0 {
c.logger.Printf("[DEBUG] client: client state updated")
}
c.lastHeartbeat = time.Now()
c.heartbeatTTL = resp.HeartbeatTTL
2015-08-21 00:49:04 +00:00
return nil
}