2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2018-04-18 16:18:18 +00:00
|
|
|
package agent
|
|
|
|
|
|
|
|
import (
|
2018-05-22 18:14:41 +00:00
|
|
|
"fmt"
|
2023-01-30 14:48:43 +00:00
|
|
|
golog "log"
|
2018-04-18 22:09:46 +00:00
|
|
|
"strings"
|
2018-04-18 16:18:18 +00:00
|
|
|
"time"
|
2018-09-13 17:43:40 +00:00
|
|
|
|
|
|
|
log "github.com/hashicorp/go-hclog"
|
2018-04-18 16:18:18 +00:00
|
|
|
)
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// DiscoverInterface is an interface for the Discover type in the go-discover
|
|
|
|
// library. Using an interface allows for ease of testing.
|
2018-04-18 16:18:18 +00:00
|
|
|
type DiscoverInterface interface {
|
2018-04-18 22:25:11 +00:00
|
|
|
// Addrs discovers ip addresses of nodes that match the given filter
|
|
|
|
// criteria.
|
|
|
|
// The config string must have the format 'provider=xxx key=val key=val ...'
|
|
|
|
// where the keys and values are provider specific. The values are URL
|
|
|
|
// encoded.
|
2018-09-13 17:43:40 +00:00
|
|
|
Addrs(string, *golog.Logger) ([]string, error)
|
2018-04-18 16:18:18 +00:00
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// Help describes the format of the configuration string for address
|
|
|
|
// discovery and the various provider specific options.
|
2018-04-18 16:18:18 +00:00
|
|
|
Help() string
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// Names returns the names of the configured providers.
|
2018-04-18 16:18:18 +00:00
|
|
|
Names() []string
|
|
|
|
}
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// retryJoiner is used to handle retrying a join until it succeeds or all of
|
|
|
|
// its tries are exhausted.
|
2018-04-18 16:18:18 +00:00
|
|
|
type retryJoiner struct {
|
2018-05-11 19:52:05 +00:00
|
|
|
// serverJoin adds the specified servers to the serf cluster
|
|
|
|
serverJoin func([]string) (int, error)
|
|
|
|
|
|
|
|
// serverEnabled indicates whether the nomad agent will run in server mode
|
|
|
|
serverEnabled bool
|
|
|
|
|
|
|
|
// clientJoin adds the specified servers to the serf cluster
|
|
|
|
clientJoin func([]string) (int, error)
|
|
|
|
|
|
|
|
// clientEnabled indicates whether the nomad agent will run in client mode
|
|
|
|
clientEnabled bool
|
2018-04-18 16:18:18 +00:00
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// discover is of type Discover, where this is either the go-discover
|
|
|
|
// implementation or a mock used for testing
|
2018-04-18 16:18:18 +00:00
|
|
|
discover DiscoverInterface
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// errCh is used to communicate with the agent when the max retry attempt
|
|
|
|
// limit has been reached
|
2018-04-18 16:18:18 +00:00
|
|
|
errCh chan struct{}
|
|
|
|
|
2018-09-13 17:43:40 +00:00
|
|
|
// logger is the retry joiners logger
|
|
|
|
logger log.Logger
|
2018-04-18 16:18:18 +00:00
|
|
|
}
|
|
|
|
|
2018-05-22 18:14:41 +00:00
|
|
|
// Validate ensures that the configuration passes validity checks for the
|
2023-01-30 14:48:43 +00:00
|
|
|
// retry_join block. If the configuration is not valid, returns an error that
|
2018-05-22 18:14:41 +00:00
|
|
|
// will be displayed to the operator, otherwise nil.
|
|
|
|
func (r *retryJoiner) Validate(config *Config) error {
|
|
|
|
|
|
|
|
// If retry_join is defined for the server, ensure that deprecated
|
2023-01-30 14:48:43 +00:00
|
|
|
// fields and the server_join block are not both set
|
2018-05-30 23:51:55 +00:00
|
|
|
if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
|
2018-05-22 18:14:41 +00:00
|
|
|
if len(config.Server.RetryJoin) != 0 {
|
2023-01-30 14:48:43 +00:00
|
|
|
return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join block")
|
2018-05-22 18:14:41 +00:00
|
|
|
}
|
|
|
|
if len(config.Server.StartJoin) != 0 {
|
2023-01-30 14:48:43 +00:00
|
|
|
return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join block")
|
2018-05-22 18:14:41 +00:00
|
|
|
}
|
|
|
|
if config.Server.RetryMaxAttempts != 0 {
|
2023-01-30 14:48:43 +00:00
|
|
|
return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join block")
|
2018-05-22 18:14:41 +00:00
|
|
|
}
|
2018-05-30 23:51:55 +00:00
|
|
|
|
|
|
|
if config.Server.RetryInterval != 0 {
|
2023-01-30 14:48:43 +00:00
|
|
|
return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join block")
|
2018-05-29 15:33:53 +00:00
|
|
|
}
|
|
|
|
|
2018-05-30 23:51:55 +00:00
|
|
|
if len(config.Server.ServerJoin.StartJoin) != 0 {
|
2018-05-30 20:49:36 +00:00
|
|
|
return fmt.Errorf("retry_join and start_join cannot both be defined")
|
2018-05-22 18:14:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if retry_join is defined for the client, ensure that start_join is not
|
|
|
|
// set as this configuration is only defined for servers.
|
|
|
|
if config.Client != nil && config.Client.ServerJoin != nil {
|
|
|
|
if config.Client.ServerJoin.StartJoin != nil {
|
2018-05-22 20:54:05 +00:00
|
|
|
return fmt.Errorf("start_join is not supported for Nomad clients")
|
2018-05-22 18:14:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
// retryJoin is used to handle retrying a join until it succeeds or all retries
|
|
|
|
// are exhausted.
|
2018-05-11 19:52:05 +00:00
|
|
|
func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
|
|
|
|
if len(serverJoin.RetryJoin) == 0 {
|
2018-04-18 16:18:18 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
attempt := 0
|
|
|
|
|
2018-05-11 19:52:05 +00:00
|
|
|
addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Info("starting retry join", "servers", addrsToJoin)
|
2018-04-18 16:18:18 +00:00
|
|
|
|
2018-09-13 17:43:40 +00:00
|
|
|
standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true})
|
2018-04-18 16:18:18 +00:00
|
|
|
for {
|
2018-04-18 22:09:46 +00:00
|
|
|
var addrs []string
|
2018-05-30 23:51:55 +00:00
|
|
|
var n int
|
2018-05-07 15:02:33 +00:00
|
|
|
var err error
|
2018-04-18 22:09:46 +00:00
|
|
|
|
2018-05-11 19:52:05 +00:00
|
|
|
for _, addr := range serverJoin.RetryJoin {
|
2018-04-18 22:09:46 +00:00
|
|
|
switch {
|
2018-05-04 20:23:36 +00:00
|
|
|
case strings.HasPrefix(addr, "provider="):
|
2018-09-13 17:43:40 +00:00
|
|
|
servers, err := r.discover.Addrs(addr, standardLogger)
|
2018-04-18 22:09:46 +00:00
|
|
|
if err != nil {
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Error("determining join addresses failed", "error", err)
|
2018-04-18 22:09:46 +00:00
|
|
|
} else {
|
|
|
|
addrs = append(addrs, servers...)
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
addrs = append(addrs, addr)
|
|
|
|
}
|
|
|
|
}
|
2018-04-18 16:18:18 +00:00
|
|
|
|
2018-05-07 15:02:33 +00:00
|
|
|
if len(addrs) > 0 {
|
2018-05-11 19:52:05 +00:00
|
|
|
if r.serverEnabled && r.serverJoin != nil {
|
2018-05-30 23:51:55 +00:00
|
|
|
n, err = r.serverJoin(addrs)
|
2018-05-11 19:52:05 +00:00
|
|
|
if err == nil {
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "server")
|
2018-05-11 19:52:05 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if r.clientEnabled && r.clientJoin != nil {
|
2018-05-30 23:51:55 +00:00
|
|
|
n, err = r.clientJoin(addrs)
|
2018-05-11 19:52:05 +00:00
|
|
|
if err == nil {
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "client")
|
2018-05-11 19:52:05 +00:00
|
|
|
return
|
|
|
|
}
|
2018-05-07 15:02:33 +00:00
|
|
|
}
|
2018-05-04 20:21:17 +00:00
|
|
|
}
|
|
|
|
|
2018-04-18 22:25:11 +00:00
|
|
|
attempt++
|
2018-05-11 19:52:05 +00:00
|
|
|
if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts {
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Error("max join retry exhausted, exiting")
|
2018-04-18 16:18:18 +00:00
|
|
|
close(r.errCh)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-05-07 20:02:51 +00:00
|
|
|
if err != nil {
|
2018-09-13 17:43:40 +00:00
|
|
|
r.logger.Warn("join failed", "error", err, "retry", serverJoin.RetryInterval)
|
2018-05-07 20:02:51 +00:00
|
|
|
}
|
2018-05-25 21:12:13 +00:00
|
|
|
time.Sleep(serverJoin.RetryInterval)
|
2018-04-18 16:18:18 +00:00
|
|
|
}
|
|
|
|
}
|