open-nomad/command/agent/retry_join.go

156 lines
4.8 KiB
Go
Raw Normal View History

2018-04-18 16:18:18 +00:00
package agent
import (
2018-05-22 18:14:41 +00:00
"fmt"
2018-04-18 16:18:18 +00:00
"log"
2018-04-18 22:09:46 +00:00
"strings"
2018-04-18 16:18:18 +00:00
"time"
)
2018-04-18 22:25:11 +00:00
// DiscoverInterface is an interface for the Discover type in the go-discover
// library. Using an interface allows for ease of testing.
2018-04-18 16:18:18 +00:00
type DiscoverInterface interface {
2018-04-18 22:25:11 +00:00
// Addrs discovers ip addresses of nodes that match the given filter
// criteria.
// The config string must have the format 'provider=xxx key=val key=val ...'
// where the keys and values are provider specific. The values are URL
// encoded.
2018-04-18 16:18:18 +00:00
Addrs(string, *log.Logger) ([]string, error)
2018-04-18 22:25:11 +00:00
// Help describes the format of the configuration string for address
// discovery and the various provider specific options.
2018-04-18 16:18:18 +00:00
Help() string
2018-04-18 22:25:11 +00:00
// Names returns the names of the configured providers.
2018-04-18 16:18:18 +00:00
Names() []string
}
2018-04-18 22:25:11 +00:00
// retryJoiner is used to handle retrying a join until it succeeds or all of
// its tries are exhausted.
2018-04-18 16:18:18 +00:00
type retryJoiner struct {
// serverJoin adds the specified servers to the serf cluster
serverJoin func([]string) (int, error)
// serverEnabled indicates whether the nomad agent will run in server mode
serverEnabled bool
// clientJoin adds the specified servers to the serf cluster
clientJoin func([]string) (int, error)
// clientEnabled indicates whether the nomad agent will run in client mode
clientEnabled bool
2018-04-18 16:18:18 +00:00
2018-04-18 22:25:11 +00:00
// discover is of type Discover, where this is either the go-discover
// implementation or a mock used for testing
2018-04-18 16:18:18 +00:00
discover DiscoverInterface
2018-04-18 22:25:11 +00:00
// errCh is used to communicate with the agent when the max retry attempt
// limit has been reached
2018-04-18 16:18:18 +00:00
errCh chan struct{}
2018-04-18 22:25:11 +00:00
// logger is the agent logger.
2018-04-18 16:18:18 +00:00
logger *log.Logger
}
2018-05-22 18:14:41 +00:00
// Validate ensures that the configuration passes validity checks for the
// retry_join stanza. If the configuration is not valid, returns an error that
// will be displayed to the operator, otherwise nil.
func (r *retryJoiner) Validate(config *Config) error {
// If retry_join is defined for the server, ensure that deprecated
// fields and the server_join stanza are not both set
if config.Server != nil && config.Server.ServerJoin != nil {
if len(config.Server.RetryJoin) != 0 {
return fmt.Errorf("server_join and retry_join cannot both be defined; try defining only server_join")
}
if len(config.Server.StartJoin) != 0 {
return fmt.Errorf("server_join and start_join cannot both be defined; try defining only server_join")
}
if config.Server.RetryMaxAttempts != 0 {
return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join")
}
if config.Server.RetryInterval != "0" && config.Server.RetryInterval != "" {
// 30s is the default value that is set, ignore if this is the case
if config.Server.RetryInterval != "30s" {
return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter")
}
}
if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 {
return fmt.Errorf("server_join and start_join cannot both be defined in the same stanza")
2018-05-22 18:14:41 +00:00
}
}
// if retry_join is defined for the client, ensure that start_join is not
// set as this configuration is only defined for servers.
if config.Client != nil && config.Client.ServerJoin != nil {
if config.Client.ServerJoin.StartJoin != nil {
return fmt.Errorf("start_join is not supported for Nomad clients")
2018-05-22 18:14:41 +00:00
}
}
return nil
}
2018-04-18 22:25:11 +00:00
// retryJoin is used to handle retrying a join until it succeeds or all retries
// are exhausted.
func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
if len(serverJoin.RetryJoin) == 0 {
2018-04-18 16:18:18 +00:00
return
}
2018-04-18 22:25:11 +00:00
attempt := 0
addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin)
2018-04-18 16:18:18 +00:00
for {
2018-04-18 22:09:46 +00:00
var addrs []string
2018-05-07 15:02:33 +00:00
var err error
2018-04-18 22:09:46 +00:00
for _, addr := range serverJoin.RetryJoin {
2018-04-18 22:09:46 +00:00
switch {
case strings.HasPrefix(addr, "provider="):
2018-04-18 22:09:46 +00:00
servers, err := r.discover.Addrs(addr, r.logger)
if err != nil {
r.logger.Printf("[ERR] agent: Join error %s", err)
} else {
addrs = append(addrs, servers...)
}
default:
addrs = append(addrs, addr)
}
}
2018-04-18 16:18:18 +00:00
2018-05-07 15:02:33 +00:00
if len(addrs) > 0 {
if r.serverEnabled && r.serverJoin != nil {
n, err := r.serverJoin(addrs)
if err == nil {
r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n)
return
}
}
if r.clientEnabled && r.clientJoin != nil {
n, err := r.clientJoin(addrs)
if err == nil {
r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n)
return
}
2018-05-07 15:02:33 +00:00
}
}
2018-04-18 22:25:11 +00:00
attempt++
if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts {
2018-04-18 16:18:18 +00:00
r.logger.Printf("[ERR] agent: max join retry exhausted, exiting")
close(r.errCh)
return
}
2018-05-07 20:02:51 +00:00
if err != nil {
r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
serverJoin.RetryInterval)
2018-05-07 20:02:51 +00:00
}
time.Sleep(serverJoin.RetryInterval)
2018-04-18 16:18:18 +00:00
}
}