2015-08-20 22:25:09 +00:00
|
|
|
package client
|
|
|
|
|
|
|
|
import (
|
2015-08-20 23:07:26 +00:00
|
|
|
"fmt"
|
2015-08-30 01:16:49 +00:00
|
|
|
"io/ioutil"
|
2015-08-20 22:25:09 +00:00
|
|
|
"log"
|
2015-08-20 23:07:26 +00:00
|
|
|
"net"
|
2015-08-20 22:25:09 +00:00
|
|
|
"os"
|
2015-08-30 01:16:49 +00:00
|
|
|
"path/filepath"
|
2015-08-20 23:07:26 +00:00
|
|
|
"strconv"
|
2015-08-20 22:25:09 +00:00
|
|
|
"sync"
|
2015-08-20 23:07:26 +00:00
|
|
|
"time"
|
|
|
|
|
2015-08-30 01:16:49 +00:00
|
|
|
"github.com/hashicorp/go-multierror"
|
2015-08-25 23:21:29 +00:00
|
|
|
"github.com/hashicorp/nomad/client/config"
|
2015-08-20 23:53:43 +00:00
|
|
|
"github.com/hashicorp/nomad/client/driver"
|
2015-08-20 23:41:29 +00:00
|
|
|
"github.com/hashicorp/nomad/client/fingerprint"
|
2015-08-20 23:07:26 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad"
|
2015-08-20 23:41:29 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2015-08-20 22:25:09 +00:00
|
|
|
)
|
|
|
|
|
2015-08-20 23:07:26 +00:00
|
|
|
const (
|
|
|
|
// clientRPCCache controls how long we keep an idle connection
|
|
|
|
// open to a server
|
|
|
|
clientRPCCache = 30 * time.Second
|
|
|
|
|
|
|
|
// clientMaxStreams controsl how many idle streams we keep
|
|
|
|
// open to a server
|
|
|
|
clientMaxStreams = 2
|
2015-08-21 00:49:04 +00:00
|
|
|
|
|
|
|
// registerRetryIntv is minimum interval on which we retry
|
|
|
|
// registration. We pick a value between this and 2x this.
|
2015-08-24 00:40:14 +00:00
|
|
|
registerRetryIntv = 15 * time.Second
|
2015-08-23 02:31:22 +00:00
|
|
|
|
|
|
|
// getAllocRetryIntv is minimum interval on which we retry
|
|
|
|
// to fetch allocations. We pick a value between this and 2x this.
|
|
|
|
getAllocRetryIntv = 30 * time.Second
|
2015-08-24 00:40:14 +00:00
|
|
|
|
|
|
|
// devModeRetryIntv is the retry interval used for development
|
|
|
|
devModeRetryIntv = time.Second
|
2015-08-31 00:19:20 +00:00
|
|
|
|
|
|
|
// stateSnapshotIntv is how often the client snapshots state
|
|
|
|
stateSnapshotIntv = 60 * time.Second
|
2015-09-07 03:18:47 +00:00
|
|
|
|
|
|
|
// registerErrGrace is the grace period where we don't log about
|
|
|
|
// register errors after start. This is to improve the user experience
|
|
|
|
// in dev mode where the leader isn't elected for a few seconds.
|
|
|
|
registerErrGrace = 10 * time.Second
|
2015-09-21 00:02:12 +00:00
|
|
|
|
|
|
|
// initialHeartbeatStagger is used to stagger the interval between
|
|
|
|
// starting and the intial heartbeat. After the intial heartbeat,
|
|
|
|
// we switch to using the TTL specified by the servers.
|
|
|
|
initialHeartbeatStagger = 10 * time.Second
|
2015-08-20 23:07:26 +00:00
|
|
|
)
|
|
|
|
|
2015-08-20 22:25:09 +00:00
|
|
|
// DefaultConfig returns the default configuration
|
2015-08-25 23:21:29 +00:00
|
|
|
func DefaultConfig() *config.Config {
|
|
|
|
return &config.Config{
|
2015-08-20 22:25:09 +00:00
|
|
|
LogOutput: os.Stderr,
|
2015-09-14 01:18:40 +00:00
|
|
|
Region: "global",
|
2015-08-20 22:25:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Client is used to implement the client interaction with Nomad. Clients
|
|
|
|
// are expected to register as a schedulable node to the servers, and to
|
|
|
|
// run allocations as determined by the servers.
|
|
|
|
type Client struct {
|
2015-08-25 23:21:29 +00:00
|
|
|
config *config.Config
|
2015-09-07 03:18:47 +00:00
|
|
|
start time.Time
|
2015-08-20 23:07:26 +00:00
|
|
|
|
2015-08-20 22:25:09 +00:00
|
|
|
logger *log.Logger
|
|
|
|
|
2015-11-24 20:34:26 +00:00
|
|
|
consulService *ConsulService
|
2015-11-18 08:50:45 +00:00
|
|
|
|
2015-08-20 23:07:26 +00:00
|
|
|
lastServer net.Addr
|
|
|
|
lastRPCTime time.Time
|
|
|
|
lastServerLock sync.Mutex
|
|
|
|
|
2015-09-25 01:51:17 +00:00
|
|
|
servers []string
|
|
|
|
serverLock sync.RWMutex
|
|
|
|
|
2015-08-20 23:07:26 +00:00
|
|
|
connPool *nomad.ConnPool
|
|
|
|
|
2015-08-23 01:16:05 +00:00
|
|
|
lastHeartbeat time.Time
|
|
|
|
heartbeatTTL time.Duration
|
|
|
|
|
2015-08-23 21:54:52 +00:00
|
|
|
// allocs is the current set of allocations
|
2015-08-23 22:32:46 +00:00
|
|
|
allocs map[string]*AllocRunner
|
2015-08-23 22:06:47 +00:00
|
|
|
allocLock sync.RWMutex
|
2015-08-23 21:54:52 +00:00
|
|
|
|
2015-08-20 22:25:09 +00:00
|
|
|
shutdown bool
|
|
|
|
shutdownCh chan struct{}
|
|
|
|
shutdownLock sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewClient is used to create a new client from the given configuration
|
2015-08-25 23:21:29 +00:00
|
|
|
func NewClient(cfg *config.Config) (*Client, error) {
|
2015-08-20 22:25:09 +00:00
|
|
|
// Create a logger
|
2015-08-25 23:21:29 +00:00
|
|
|
logger := log.New(cfg.LogOutput, "", log.LstdFlags)
|
2015-08-20 22:25:09 +00:00
|
|
|
|
2015-08-20 23:41:29 +00:00
|
|
|
// Create the client
|
2015-08-20 22:25:09 +00:00
|
|
|
c := &Client{
|
2015-11-25 21:39:16 +00:00
|
|
|
config: cfg,
|
|
|
|
start: time.Now(),
|
|
|
|
connPool: nomad.NewPool(cfg.LogOutput, clientRPCCache, clientMaxStreams, nil),
|
|
|
|
logger: logger,
|
|
|
|
allocs: make(map[string]*AllocRunner),
|
|
|
|
shutdownCh: make(chan struct{}),
|
|
|
|
}
|
|
|
|
|
|
|
|
// Setup the Consul Service
|
|
|
|
if err := c.setupConsulService(); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create the consul service: %v", err)
|
2015-08-20 22:25:09 +00:00
|
|
|
}
|
2015-08-20 23:41:29 +00:00
|
|
|
|
2015-09-12 18:47:44 +00:00
|
|
|
// Initialize the client
|
|
|
|
if err := c.init(); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed intializing client: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-08-20 23:41:29 +00:00
|
|
|
// Setup the node
|
|
|
|
if err := c.setupNode(); err != nil {
|
|
|
|
return nil, fmt.Errorf("node setup failed: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fingerprint the node
|
|
|
|
if err := c.fingerprint(); err != nil {
|
|
|
|
return nil, fmt.Errorf("fingerprinting failed: %v", err)
|
|
|
|
}
|
2015-08-20 23:53:43 +00:00
|
|
|
|
|
|
|
// Scan for drivers
|
|
|
|
if err := c.setupDrivers(); err != nil {
|
|
|
|
return nil, fmt.Errorf("driver setup failed: %v", err)
|
|
|
|
}
|
2015-08-21 00:49:04 +00:00
|
|
|
|
2015-09-25 01:51:17 +00:00
|
|
|
// Set up the known servers list
|
|
|
|
c.SetServers(c.config.Servers)
|
|
|
|
|
2015-11-09 23:55:31 +00:00
|
|
|
// Restore the state
|
|
|
|
if err := c.restoreState(); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to restore state: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-08-21 00:49:04 +00:00
|
|
|
// Start the client!
|
|
|
|
go c.run()
|
2015-11-18 12:59:57 +00:00
|
|
|
|
2015-11-24 20:34:26 +00:00
|
|
|
// Start the consul service
|
|
|
|
go c.consulService.SyncWithConsul()
|
2015-08-20 22:25:09 +00:00
|
|
|
return c, nil
|
2015-08-23 23:53:15 +00:00
|
|
|
}
|
|
|
|
|
2015-11-25 21:39:16 +00:00
|
|
|
func (c *Client) setupConsulService() error {
|
|
|
|
var consulService *ConsulService
|
|
|
|
var err error
|
|
|
|
addr := c.config.ReadDefault("consul.address", "127.0.0.1:8500")
|
|
|
|
token := c.config.Read("consul.token")
|
|
|
|
auth := c.config.Read("consul.auth")
|
|
|
|
enableSSL := c.config.ReadBoolDefault("consul.ssl", false)
|
2015-11-29 01:33:34 +00:00
|
|
|
verifySSL := c.config.ReadBoolDefault("consul.verifyssl", true)
|
2015-12-11 19:02:23 +00:00
|
|
|
consulServiceCfg := &consulServiceConfig{
|
|
|
|
logger: c.logger,
|
|
|
|
consulAddr: addr,
|
|
|
|
token: token,
|
|
|
|
auth: auth,
|
|
|
|
enableSSL: enableSSL,
|
|
|
|
verifySSL: verifySSL,
|
|
|
|
node: c.config.Node,
|
|
|
|
}
|
|
|
|
if consulService, err = NewConsulService(consulServiceCfg); err != nil {
|
2015-11-25 21:39:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
c.consulService = consulService
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-09-12 18:47:44 +00:00
|
|
|
// init is used to initialize the client and perform any setup
|
|
|
|
// needed before we begin starting its various components.
|
|
|
|
func (c *Client) init() error {
|
2015-09-24 21:29:53 +00:00
|
|
|
// Ensure the state dir exists if we have one
|
|
|
|
if c.config.StateDir != "" {
|
|
|
|
if err := os.MkdirAll(c.config.StateDir, 0700); err != nil {
|
|
|
|
return fmt.Errorf("failed creating state dir: %s", err)
|
|
|
|
}
|
2015-09-25 17:04:08 +00:00
|
|
|
|
2015-11-11 00:03:18 +00:00
|
|
|
} else {
|
|
|
|
// Othewise make a temp directory to use.
|
|
|
|
p, err := ioutil.TempDir("", "NomadClient")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed creating temporary directory for the StateDir: %v", err)
|
|
|
|
}
|
|
|
|
c.config.StateDir = p
|
2015-09-24 21:29:53 +00:00
|
|
|
}
|
2015-11-11 00:03:18 +00:00
|
|
|
c.logger.Printf("[INFO] client: using state directory %v", c.config.StateDir)
|
2015-09-24 21:29:53 +00:00
|
|
|
|
2015-09-13 19:14:12 +00:00
|
|
|
// Ensure the alloc dir exists if we have one
|
|
|
|
if c.config.AllocDir != "" {
|
|
|
|
if err := os.MkdirAll(c.config.AllocDir, 0700); err != nil {
|
|
|
|
return fmt.Errorf("failed creating alloc dir: %s", err)
|
|
|
|
}
|
2015-09-26 01:12:11 +00:00
|
|
|
} else {
|
|
|
|
// Othewise make a temp directory to use.
|
|
|
|
p, err := ioutil.TempDir("", "NomadClient")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed creating temporary directory for the AllocDir: %v", err)
|
|
|
|
}
|
|
|
|
c.config.AllocDir = p
|
2015-09-12 18:47:44 +00:00
|
|
|
}
|
2015-09-23 05:00:24 +00:00
|
|
|
|
2015-09-25 23:46:21 +00:00
|
|
|
c.logger.Printf("[INFO] client: using alloc directory %v", c.config.AllocDir)
|
2015-09-12 18:47:44 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-23 23:53:15 +00:00
|
|
|
// Leave is used to prepare the client to leave the cluster
|
|
|
|
func (c *Client) Leave() error {
|
|
|
|
// TODO
|
|
|
|
return nil
|
2015-08-20 22:25:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown is used to tear down the client
|
|
|
|
func (c *Client) Shutdown() error {
|
2015-08-20 23:07:26 +00:00
|
|
|
c.logger.Printf("[INFO] client: shutting down")
|
2015-08-20 22:25:09 +00:00
|
|
|
c.shutdownLock.Lock()
|
|
|
|
defer c.shutdownLock.Unlock()
|
|
|
|
|
|
|
|
if c.shutdown {
|
|
|
|
return nil
|
|
|
|
}
|
2015-10-04 20:36:03 +00:00
|
|
|
|
|
|
|
// Destroy all the running allocations.
|
|
|
|
if c.config.DevMode {
|
|
|
|
for _, ar := range c.allocs {
|
|
|
|
ar.Destroy()
|
|
|
|
<-ar.WaitCh()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-24 20:34:26 +00:00
|
|
|
// Stop the consul service
|
|
|
|
c.consulService.ShutDown()
|
2015-11-18 12:59:57 +00:00
|
|
|
|
2015-08-20 22:25:09 +00:00
|
|
|
c.shutdown = true
|
|
|
|
close(c.shutdownCh)
|
2015-08-21 00:49:04 +00:00
|
|
|
c.connPool.Shutdown()
|
2015-08-30 01:16:49 +00:00
|
|
|
return c.saveState()
|
2015-08-20 22:25:09 +00:00
|
|
|
}
|
2015-08-20 23:07:26 +00:00
|
|
|
|
|
|
|
// RPC is used to forward an RPC call to a nomad server, or fail if no servers
|
|
|
|
func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
|
|
|
|
// Invoke the RPCHandle if it exists
|
|
|
|
if c.config.RPCHandler != nil {
|
|
|
|
return c.config.RPCHandler.RPC(method, args, reply)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pick a server to request from
|
|
|
|
addr, err := c.pickServer()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the RPC request
|
|
|
|
err = c.connPool.RPC(c.config.Region, addr, 1, method, args, reply)
|
|
|
|
|
|
|
|
// Update the last server information
|
|
|
|
c.lastServerLock.Lock()
|
|
|
|
if err != nil {
|
|
|
|
c.lastServer = nil
|
|
|
|
c.lastRPCTime = time.Time{}
|
|
|
|
} else {
|
|
|
|
c.lastServer = addr
|
|
|
|
c.lastRPCTime = time.Now()
|
|
|
|
}
|
|
|
|
c.lastServerLock.Unlock()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// pickServer is used to pick a target RPC server
|
|
|
|
func (c *Client) pickServer() (net.Addr, error) {
|
|
|
|
c.lastServerLock.Lock()
|
|
|
|
defer c.lastServerLock.Unlock()
|
|
|
|
|
|
|
|
// Check for a valid last-used server
|
|
|
|
if c.lastServer != nil && time.Now().Sub(c.lastRPCTime) < clientRPCCache {
|
|
|
|
return c.lastServer, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bail if we can't find any servers
|
2015-09-25 01:51:17 +00:00
|
|
|
servers := c.Servers()
|
|
|
|
if len(servers) == 0 {
|
2015-08-20 23:07:26 +00:00
|
|
|
return nil, fmt.Errorf("no known servers")
|
|
|
|
}
|
|
|
|
|
2015-09-25 01:51:17 +00:00
|
|
|
// Shuffle so we don't always use the same server
|
2015-08-20 23:07:26 +00:00
|
|
|
shuffleStrings(servers)
|
|
|
|
|
|
|
|
// Try to resolve each server
|
|
|
|
for i := 0; i < len(servers); i++ {
|
|
|
|
addr, err := net.ResolveTCPAddr("tcp", servers[i])
|
|
|
|
if err == nil {
|
|
|
|
c.lastServer = addr
|
|
|
|
c.lastRPCTime = time.Now()
|
|
|
|
return addr, nil
|
|
|
|
}
|
2015-08-27 00:14:56 +00:00
|
|
|
c.logger.Printf("[WARN] client: failed to resolve '%s': %s", servers[i], err)
|
2015-08-20 23:07:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Bail if we reach this point
|
|
|
|
return nil, fmt.Errorf("failed to resolve any servers")
|
|
|
|
}
|
|
|
|
|
2015-09-25 01:51:17 +00:00
|
|
|
// Servers is used to return the current known servers list. When an agent
|
|
|
|
// is first started, this list comes directly from configuration files.
|
|
|
|
func (c *Client) Servers() []string {
|
|
|
|
c.serverLock.RLock()
|
|
|
|
defer c.serverLock.RUnlock()
|
|
|
|
return c.servers
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetServers is used to modify the known servers list. This avoids forcing
|
|
|
|
// a config rollout + rolling restart and enables auto-join features. The
|
|
|
|
// full set of servers is passed to support adding and/or removing servers.
|
|
|
|
func (c *Client) SetServers(servers []string) {
|
|
|
|
c.serverLock.Lock()
|
|
|
|
defer c.serverLock.Unlock()
|
|
|
|
if servers == nil {
|
|
|
|
servers = make([]string, 0)
|
|
|
|
}
|
|
|
|
c.servers = servers
|
|
|
|
}
|
|
|
|
|
2015-08-20 23:07:26 +00:00
|
|
|
// Stats is used to return statistics for debugging and insight
|
|
|
|
// for various sub-systems
|
|
|
|
func (c *Client) Stats() map[string]map[string]string {
|
|
|
|
toString := func(v uint64) string {
|
|
|
|
return strconv.FormatUint(v, 10)
|
|
|
|
}
|
2015-08-31 00:24:12 +00:00
|
|
|
c.allocLock.RLock()
|
|
|
|
numAllocs := len(c.allocs)
|
|
|
|
c.allocLock.RUnlock()
|
|
|
|
|
2015-08-20 23:07:26 +00:00
|
|
|
stats := map[string]map[string]string{
|
2015-08-31 00:24:12 +00:00
|
|
|
"client": map[string]string{
|
2015-09-25 01:51:17 +00:00
|
|
|
"known_servers": toString(uint64(len(c.Servers()))),
|
2015-08-31 00:24:12 +00:00
|
|
|
"num_allocations": toString(uint64(numAllocs)),
|
2015-09-22 22:29:30 +00:00
|
|
|
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
|
|
|
|
"heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL),
|
2015-08-20 23:07:26 +00:00
|
|
|
},
|
|
|
|
"runtime": nomad.RuntimeStats(),
|
|
|
|
}
|
|
|
|
return stats
|
|
|
|
}
|
2015-08-20 23:41:29 +00:00
|
|
|
|
|
|
|
// Node returns the locally registered node
|
|
|
|
func (c *Client) Node() *structs.Node {
|
|
|
|
return c.config.Node
|
|
|
|
}
|
|
|
|
|
2015-08-23 21:12:26 +00:00
|
|
|
// restoreState is used to restore our state from the data dir
|
|
|
|
func (c *Client) restoreState() error {
|
|
|
|
if c.config.DevMode {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-30 01:16:49 +00:00
|
|
|
// Scan the directory
|
|
|
|
list, err := ioutil.ReadDir(filepath.Join(c.config.StateDir, "alloc"))
|
2015-09-23 05:00:24 +00:00
|
|
|
if err != nil && os.IsNotExist(err) {
|
|
|
|
return nil
|
|
|
|
} else if err != nil {
|
2015-08-30 01:16:49 +00:00
|
|
|
return fmt.Errorf("failed to list alloc state: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load each alloc back
|
|
|
|
var mErr multierror.Error
|
|
|
|
for _, entry := range list {
|
|
|
|
id := entry.Name()
|
|
|
|
alloc := &structs.Allocation{ID: id}
|
2015-11-24 20:34:26 +00:00
|
|
|
ar := NewAllocRunner(c.logger, c.config, c.updateAllocStatus, alloc, c.consulService)
|
2015-08-30 01:16:49 +00:00
|
|
|
c.allocs[id] = ar
|
|
|
|
if err := ar.RestoreState(); err != nil {
|
2015-11-09 23:55:31 +00:00
|
|
|
c.logger.Printf("[ERR] client: failed to restore state for alloc %s: %v", id, err)
|
2015-08-30 01:16:49 +00:00
|
|
|
mErr.Errors = append(mErr.Errors, err)
|
|
|
|
} else {
|
|
|
|
go ar.Run()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return mErr.ErrorOrNil()
|
2015-08-23 21:12:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// saveState is used to snapshot our state into the data dir
|
|
|
|
func (c *Client) saveState() error {
|
|
|
|
if c.config.DevMode {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-30 01:16:49 +00:00
|
|
|
var mErr multierror.Error
|
|
|
|
c.allocLock.RLock()
|
|
|
|
defer c.allocLock.RUnlock()
|
|
|
|
for id, ar := range c.allocs {
|
|
|
|
if err := ar.SaveState(); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to save state for alloc %s: %v",
|
|
|
|
id, err)
|
|
|
|
mErr.Errors = append(mErr.Errors, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return mErr.ErrorOrNil()
|
2015-08-23 21:12:26 +00:00
|
|
|
}
|
|
|
|
|
2015-09-22 17:31:47 +00:00
|
|
|
// nodeID restores a persistent unique ID or generates a new one
|
|
|
|
func (c *Client) nodeID() (string, error) {
|
|
|
|
// Do not persist in dev mode
|
|
|
|
if c.config.DevMode {
|
|
|
|
return structs.GenerateUUID(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to read existing ID
|
|
|
|
path := filepath.Join(c.config.StateDir, "client-id")
|
|
|
|
buf, err := ioutil.ReadFile(path)
|
|
|
|
if err != nil && !os.IsNotExist(err) {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use existing ID if any
|
|
|
|
if len(buf) != 0 {
|
|
|
|
return string(buf), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate new ID
|
|
|
|
id := structs.GenerateUUID()
|
|
|
|
|
|
|
|
// Persist the ID
|
|
|
|
if err := ioutil.WriteFile(path, []byte(id), 0700); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return id, nil
|
|
|
|
}
|
|
|
|
|
2015-08-20 23:41:29 +00:00
|
|
|
// setupNode is used to setup the initial node
|
|
|
|
func (c *Client) setupNode() error {
|
|
|
|
node := c.config.Node
|
|
|
|
if node == nil {
|
|
|
|
node = &structs.Node{}
|
|
|
|
c.config.Node = node
|
|
|
|
}
|
|
|
|
if node.Attributes == nil {
|
|
|
|
node.Attributes = make(map[string]string)
|
|
|
|
}
|
|
|
|
if node.Links == nil {
|
|
|
|
node.Links = make(map[string]string)
|
|
|
|
}
|
|
|
|
if node.Meta == nil {
|
|
|
|
node.Meta = make(map[string]string)
|
|
|
|
}
|
2015-08-21 00:49:04 +00:00
|
|
|
if node.Resources == nil {
|
|
|
|
node.Resources = &structs.Resources{}
|
|
|
|
}
|
|
|
|
if node.ID == "" {
|
2015-09-22 17:31:47 +00:00
|
|
|
id, err := c.nodeID()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("node ID setup failed: %v", err)
|
|
|
|
}
|
|
|
|
node.ID = id
|
2015-08-21 00:49:04 +00:00
|
|
|
}
|
|
|
|
if node.Datacenter == "" {
|
|
|
|
node.Datacenter = "dc1"
|
|
|
|
}
|
|
|
|
if node.Name == "" {
|
|
|
|
node.Name, _ = os.Hostname()
|
|
|
|
}
|
|
|
|
if node.Name == "" {
|
|
|
|
node.Name = node.ID
|
|
|
|
}
|
|
|
|
node.Status = structs.NodeStatusInit
|
2015-08-20 23:41:29 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// fingerprint is used to fingerprint the client and setup the node
|
|
|
|
func (c *Client) fingerprint() error {
|
2015-11-24 15:18:49 +00:00
|
|
|
whitelist := c.config.ReadStringListToMap("fingerprint.whitelist")
|
|
|
|
whitelistEnabled := len(whitelist) > 0
|
|
|
|
|
2015-08-20 23:41:29 +00:00
|
|
|
var applied []string
|
2015-11-24 15:18:49 +00:00
|
|
|
var skipped []string
|
2015-09-23 04:25:12 +00:00
|
|
|
for _, name := range fingerprint.BuiltinFingerprints {
|
2015-11-24 15:18:49 +00:00
|
|
|
// Skip modules that are not in the whitelist if it is enabled.
|
|
|
|
if _, ok := whitelist[name]; whitelistEnabled && !ok {
|
|
|
|
skipped = append(skipped, name)
|
|
|
|
continue
|
|
|
|
}
|
2015-08-20 23:41:29 +00:00
|
|
|
f, err := fingerprint.NewFingerprint(name, c.logger)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-25 23:21:29 +00:00
|
|
|
applies, err := f.Fingerprint(c.config, c.config.Node)
|
2015-08-20 23:41:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if applies {
|
|
|
|
applied = append(applied, name)
|
|
|
|
}
|
2015-11-05 21:41:41 +00:00
|
|
|
p, period := f.Periodic()
|
|
|
|
if p {
|
|
|
|
// TODO: If more periodic fingerprinters are added, then
|
|
|
|
// fingerprintPeriodic should be used to handle all the periodic
|
|
|
|
// fingerprinters by using a priority queue.
|
|
|
|
go c.fingerprintPeriodic(name, f, period)
|
|
|
|
}
|
2015-08-20 23:41:29 +00:00
|
|
|
}
|
|
|
|
c.logger.Printf("[DEBUG] client: applied fingerprints %v", applied)
|
2015-11-24 15:18:49 +00:00
|
|
|
if len(skipped) != 0 {
|
|
|
|
c.logger.Printf("[DEBUG] client: fingerprint modules skipped due to whitelist: %v", skipped)
|
|
|
|
}
|
2015-08-20 23:41:29 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-08-20 23:53:43 +00:00
|
|
|
|
2015-11-06 02:47:16 +00:00
|
|
|
// fingerprintPeriodic runs a fingerprinter at the specified duration.
|
2015-11-05 21:41:41 +00:00
|
|
|
func (c *Client) fingerprintPeriodic(name string, f fingerprint.Fingerprint, d time.Duration) {
|
|
|
|
c.logger.Printf("[DEBUG] client: periodically fingerprinting %v at duration %v", name, d)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-time.After(d):
|
|
|
|
if _, err := f.Fingerprint(c.config, c.config.Node); err != nil {
|
2015-11-06 02:47:16 +00:00
|
|
|
c.logger.Printf("[DEBUG] client: periodic fingerprinting for %v failed: %v", name, err)
|
2015-11-05 21:41:41 +00:00
|
|
|
}
|
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-20 23:53:43 +00:00
|
|
|
// setupDrivers is used to find the available drivers
|
|
|
|
func (c *Client) setupDrivers() error {
|
2015-11-20 00:39:21 +00:00
|
|
|
// Build the whitelist of drivers.
|
2015-11-24 15:18:49 +00:00
|
|
|
whitelist := c.config.ReadStringListToMap("driver.whitelist")
|
2015-11-20 00:39:21 +00:00
|
|
|
whitelistEnabled := len(whitelist) > 0
|
|
|
|
|
2015-08-20 23:53:43 +00:00
|
|
|
var avail []string
|
2015-11-20 22:07:35 +00:00
|
|
|
var skipped []string
|
2016-01-11 17:58:26 +00:00
|
|
|
driverCtx := driver.NewDriverContext("", c.config, c.config.Node, c.logger, nil)
|
2015-08-20 23:53:43 +00:00
|
|
|
for name := range driver.BuiltinDrivers {
|
2015-11-20 00:39:21 +00:00
|
|
|
// Skip fingerprinting drivers that are not in the whitelist if it is
|
|
|
|
// enabled.
|
|
|
|
if _, ok := whitelist[name]; whitelistEnabled && !ok {
|
2015-11-20 22:07:35 +00:00
|
|
|
skipped = append(skipped, name)
|
2015-11-20 00:39:21 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-09-10 01:06:23 +00:00
|
|
|
d, err := driver.NewDriver(name, driverCtx)
|
2015-08-20 23:53:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-25 23:21:29 +00:00
|
|
|
applies, err := d.Fingerprint(c.config, c.config.Node)
|
2015-08-20 23:53:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if applies {
|
|
|
|
avail = append(avail, name)
|
|
|
|
}
|
|
|
|
}
|
2015-11-20 00:39:21 +00:00
|
|
|
|
2015-08-20 23:53:43 +00:00
|
|
|
c.logger.Printf("[DEBUG] client: available drivers %v", avail)
|
2015-11-20 00:39:21 +00:00
|
|
|
|
2015-11-20 22:07:35 +00:00
|
|
|
if len(skipped) != 0 {
|
|
|
|
c.logger.Printf("[DEBUG] client: drivers skipped due to whitelist: %v", skipped)
|
2015-11-20 00:39:21 +00:00
|
|
|
}
|
|
|
|
|
2015-08-20 23:53:43 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-08-21 00:49:04 +00:00
|
|
|
|
2015-08-24 00:40:14 +00:00
|
|
|
// retryIntv calculates a retry interval value given the base
|
|
|
|
func (c *Client) retryIntv(base time.Duration) time.Duration {
|
|
|
|
if c.config.DevMode {
|
|
|
|
return devModeRetryIntv
|
|
|
|
}
|
|
|
|
return base + randomStagger(base)
|
|
|
|
}
|
|
|
|
|
2015-08-21 00:49:04 +00:00
|
|
|
// run is a long lived goroutine used to run the client
|
|
|
|
func (c *Client) run() {
|
|
|
|
// Register the client
|
|
|
|
for {
|
|
|
|
if err := c.registerNode(); err == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
select {
|
2015-08-24 00:40:14 +00:00
|
|
|
case <-time.After(c.retryIntv(registerRetryIntv)):
|
2015-08-21 00:49:04 +00:00
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-21 00:02:12 +00:00
|
|
|
// Setup the heartbeat timer, for the initial registration
|
|
|
|
// we want to do this quickly. We want to do it extra quickly
|
|
|
|
// in development mode.
|
|
|
|
var heartbeat <-chan time.Time
|
|
|
|
if c.config.DevMode {
|
|
|
|
heartbeat = time.After(0)
|
|
|
|
} else {
|
|
|
|
heartbeat = time.After(randomStagger(initialHeartbeatStagger))
|
|
|
|
}
|
2015-08-23 01:16:05 +00:00
|
|
|
|
2015-08-23 02:31:22 +00:00
|
|
|
// Watch for changes in allocations
|
|
|
|
allocUpdates := make(chan []*structs.Allocation, 1)
|
|
|
|
go c.watchAllocations(allocUpdates)
|
2015-08-21 00:49:04 +00:00
|
|
|
|
2015-08-31 00:19:20 +00:00
|
|
|
// Create a snapshot timer
|
|
|
|
snapshot := time.After(stateSnapshotIntv)
|
|
|
|
|
2015-08-23 01:16:05 +00:00
|
|
|
// Periodically update our status and wait for termination
|
2015-08-24 00:40:14 +00:00
|
|
|
for {
|
|
|
|
select {
|
2015-08-31 00:19:20 +00:00
|
|
|
case <-snapshot:
|
|
|
|
snapshot = time.After(stateSnapshotIntv)
|
|
|
|
if err := c.saveState(); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to save state: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-08-24 00:40:14 +00:00
|
|
|
case allocs := <-allocUpdates:
|
|
|
|
c.runAllocs(allocs)
|
|
|
|
|
|
|
|
case <-heartbeat:
|
|
|
|
if err := c.updateNodeStatus(); err != nil {
|
|
|
|
heartbeat = time.After(c.retryIntv(registerRetryIntv))
|
|
|
|
} else {
|
|
|
|
heartbeat = time.After(c.heartbeatTTL)
|
|
|
|
}
|
2015-08-23 02:31:22 +00:00
|
|
|
|
2015-08-24 00:40:14 +00:00
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
2015-08-21 00:49:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// registerNode is used to register the node or update the registration
|
|
|
|
func (c *Client) registerNode() error {
|
|
|
|
node := c.Node()
|
|
|
|
req := structs.NodeRegisterRequest{
|
|
|
|
Node: node,
|
|
|
|
WriteRequest: structs.WriteRequest{Region: c.config.Region},
|
|
|
|
}
|
|
|
|
var resp structs.NodeUpdateResponse
|
2015-09-07 03:31:32 +00:00
|
|
|
err := c.RPC("Node.Register", &req, &resp)
|
2015-08-21 00:49:04 +00:00
|
|
|
if err != nil {
|
2015-09-07 03:18:47 +00:00
|
|
|
if time.Since(c.start) > registerErrGrace {
|
|
|
|
c.logger.Printf("[ERR] client: failed to register node: %v", err)
|
|
|
|
}
|
2015-08-21 00:49:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
c.logger.Printf("[DEBUG] client: node registration complete")
|
|
|
|
if len(resp.EvalIDs) != 0 {
|
|
|
|
c.logger.Printf("[DEBUG] client: %d evaluations triggered by node registration", len(resp.EvalIDs))
|
|
|
|
}
|
2015-08-23 01:16:05 +00:00
|
|
|
c.lastHeartbeat = time.Now()
|
|
|
|
c.heartbeatTTL = resp.HeartbeatTTL
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateNodeStatus is used to heartbeat and update the status of the node
|
|
|
|
func (c *Client) updateNodeStatus() error {
|
|
|
|
node := c.Node()
|
|
|
|
req := structs.NodeUpdateStatusRequest{
|
|
|
|
NodeID: node.ID,
|
|
|
|
Status: structs.NodeStatusReady,
|
|
|
|
WriteRequest: structs.WriteRequest{Region: c.config.Region},
|
|
|
|
}
|
|
|
|
var resp structs.NodeUpdateResponse
|
2015-09-07 03:31:32 +00:00
|
|
|
err := c.RPC("Node.UpdateStatus", &req, &resp)
|
2015-08-23 01:16:05 +00:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to update status: %v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if len(resp.EvalIDs) != 0 {
|
|
|
|
c.logger.Printf("[DEBUG] client: %d evaluations triggered by node update", len(resp.EvalIDs))
|
|
|
|
}
|
|
|
|
if resp.Index != 0 {
|
2015-08-24 00:40:14 +00:00
|
|
|
c.logger.Printf("[DEBUG] client: state updated to %s", req.Status)
|
2015-08-23 01:16:05 +00:00
|
|
|
}
|
|
|
|
c.lastHeartbeat = time.Now()
|
|
|
|
c.heartbeatTTL = resp.HeartbeatTTL
|
2015-08-21 00:49:04 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-08-23 02:31:22 +00:00
|
|
|
|
2015-08-29 21:22:24 +00:00
|
|
|
// updateAllocStatus is used to update the status of an allocation
|
|
|
|
func (c *Client) updateAllocStatus(alloc *structs.Allocation) error {
|
|
|
|
args := structs.AllocUpdateRequest{
|
|
|
|
Alloc: []*structs.Allocation{alloc},
|
|
|
|
WriteRequest: structs.WriteRequest{Region: c.config.Region},
|
|
|
|
}
|
|
|
|
var resp structs.GenericResponse
|
2015-09-07 03:31:32 +00:00
|
|
|
err := c.RPC("Node.UpdateAlloc", &args, &resp)
|
2015-08-29 21:22:24 +00:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to update allocation: %v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-23 02:31:22 +00:00
|
|
|
// watchAllocations is used to scan for updates to allocations
|
|
|
|
func (c *Client) watchAllocations(allocUpdates chan []*structs.Allocation) {
|
|
|
|
req := structs.NodeSpecificRequest{
|
|
|
|
NodeID: c.Node().ID,
|
|
|
|
QueryOptions: structs.QueryOptions{
|
2015-08-24 00:40:14 +00:00
|
|
|
Region: c.config.Region,
|
|
|
|
AllowStale: true,
|
2015-08-23 02:31:22 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
var resp structs.NodeAllocsResponse
|
|
|
|
|
|
|
|
for {
|
|
|
|
// Get the allocations, blocking for updates
|
2015-09-23 05:10:28 +00:00
|
|
|
resp = structs.NodeAllocsResponse{}
|
2015-09-07 03:31:32 +00:00
|
|
|
err := c.RPC("Node.GetAllocs", &req, &resp)
|
2015-08-23 02:31:22 +00:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to query for node allocations: %v", err)
|
2015-08-24 00:40:14 +00:00
|
|
|
retry := c.retryIntv(getAllocRetryIntv)
|
2015-08-23 02:31:22 +00:00
|
|
|
select {
|
|
|
|
case <-time.After(retry):
|
|
|
|
continue
|
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for shutdown
|
|
|
|
select {
|
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for updates
|
2015-08-24 00:40:14 +00:00
|
|
|
if resp.Index <= req.MinQueryIndex {
|
2015-08-23 02:31:22 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-08-24 00:40:14 +00:00
|
|
|
req.MinQueryIndex = resp.Index
|
|
|
|
c.logger.Printf("[DEBUG] client: updated allocations at index %d (%d allocs)", resp.Index, len(resp.Allocs))
|
2015-08-23 02:31:22 +00:00
|
|
|
|
|
|
|
// Push the updates
|
|
|
|
select {
|
|
|
|
case allocUpdates <- resp.Allocs:
|
|
|
|
case <-c.shutdownCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// runAllocs is invoked when we get an updated set of allocations
|
2015-08-23 21:54:52 +00:00
|
|
|
func (c *Client) runAllocs(updated []*structs.Allocation) {
|
|
|
|
// Get the existing allocs
|
2015-08-23 22:06:47 +00:00
|
|
|
c.allocLock.RLock()
|
2015-08-29 21:33:30 +00:00
|
|
|
exist := make([]*structs.Allocation, 0, len(c.allocs))
|
2015-08-30 01:16:49 +00:00
|
|
|
for _, ar := range c.allocs {
|
|
|
|
exist = append(exist, ar.Alloc())
|
2015-08-23 21:54:52 +00:00
|
|
|
}
|
2015-08-23 22:06:47 +00:00
|
|
|
c.allocLock.RUnlock()
|
2015-08-23 21:54:52 +00:00
|
|
|
|
|
|
|
// Diff the existing and updated allocations
|
|
|
|
diff := diffAllocs(exist, updated)
|
|
|
|
c.logger.Printf("[DEBUG] client: %#v", diff)
|
|
|
|
|
|
|
|
// Remove the old allocations
|
|
|
|
for _, remove := range diff.removed {
|
|
|
|
if err := c.removeAlloc(remove); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to remove alloc '%s': %v",
|
|
|
|
remove.ID, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the existing allocations
|
|
|
|
for _, update := range diff.updated {
|
|
|
|
if err := c.updateAlloc(update.exist, update.updated); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to update alloc '%s': %v",
|
|
|
|
update.exist.ID, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Start the new allocations
|
|
|
|
for _, add := range diff.added {
|
|
|
|
if err := c.addAlloc(add); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to add alloc '%s': %v",
|
|
|
|
add.ID, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Persist our state
|
|
|
|
if err := c.saveState(); err != nil {
|
|
|
|
c.logger.Printf("[ERR] client: failed to save state: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// removeAlloc is invoked when we should remove an allocation
|
|
|
|
func (c *Client) removeAlloc(alloc *structs.Allocation) error {
|
2015-08-29 21:33:30 +00:00
|
|
|
c.allocLock.Lock()
|
|
|
|
defer c.allocLock.Unlock()
|
2015-08-30 01:16:49 +00:00
|
|
|
ar, ok := c.allocs[alloc.ID]
|
2015-08-23 22:06:47 +00:00
|
|
|
if !ok {
|
|
|
|
c.logger.Printf("[WARN] client: missing context for alloc '%s'", alloc.ID)
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-30 01:16:49 +00:00
|
|
|
ar.Destroy()
|
2015-08-29 21:33:30 +00:00
|
|
|
delete(c.allocs, alloc.ID)
|
2015-08-23 21:54:52 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateAlloc is invoked when we should update an allocation
|
|
|
|
func (c *Client) updateAlloc(exist, update *structs.Allocation) error {
|
2015-08-23 22:06:47 +00:00
|
|
|
c.allocLock.RLock()
|
|
|
|
defer c.allocLock.RUnlock()
|
2015-08-30 01:16:49 +00:00
|
|
|
ar, ok := c.allocs[exist.ID]
|
2015-08-23 22:06:47 +00:00
|
|
|
if !ok {
|
|
|
|
c.logger.Printf("[WARN] client: missing context for alloc '%s'", exist.ID)
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-30 01:16:49 +00:00
|
|
|
ar.Update(update)
|
2015-08-23 21:54:52 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// addAlloc is invoked when we should add an allocation
|
|
|
|
func (c *Client) addAlloc(alloc *structs.Allocation) error {
|
2015-08-23 22:06:47 +00:00
|
|
|
c.allocLock.Lock()
|
|
|
|
defer c.allocLock.Unlock()
|
2015-11-24 20:34:26 +00:00
|
|
|
ar := NewAllocRunner(c.logger, c.config, c.updateAllocStatus, alloc, c.consulService)
|
2015-08-30 01:16:49 +00:00
|
|
|
c.allocs[alloc.ID] = ar
|
|
|
|
go ar.Run()
|
2015-08-23 21:54:52 +00:00
|
|
|
return nil
|
2015-08-23 02:31:22 +00:00
|
|
|
}
|