395 lines
11 KiB
Go
395 lines
11 KiB
Go
package agent
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"math/rand"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
metrics "github.com/armon/go-metrics"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/api"
|
|
"github.com/hashicorp/nomad/ci"
|
|
client "github.com/hashicorp/nomad/client/config"
|
|
"github.com/hashicorp/nomad/client/fingerprint"
|
|
"github.com/hashicorp/nomad/helper"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/nomad"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
sconfig "github.com/hashicorp/nomad/nomad/structs/config"
|
|
"github.com/hashicorp/nomad/testutil"
|
|
)
|
|
|
|
// TempDir defines the base dir for temporary directories.
|
|
var TempDir = os.TempDir()
|
|
|
|
// TestAgent encapsulates an Agent with a default configuration and startup
|
|
// procedure suitable for testing. It manages a temporary data directory which
|
|
// is removed after shutdown.
|
|
type TestAgent struct {
|
|
// T is the testing object
|
|
T testing.TB
|
|
|
|
// Name is an optional name of the agent.
|
|
Name string
|
|
|
|
// ConfigCallback is an optional callback that allows modification of the
|
|
// configuration before the agent is started.
|
|
ConfigCallback func(*Config)
|
|
|
|
// Config is the agent configuration. If Config is nil then
|
|
// TestConfig() is used. If Config.DataDir is set then it is
|
|
// the callers responsibility to clean up the data directory.
|
|
// Otherwise, a temporary data directory is created and removed
|
|
// when Shutdown() is called.
|
|
Config *Config
|
|
|
|
// logger is used for logging
|
|
logger hclog.InterceptLogger
|
|
|
|
// DataDir is the data directory which is used when Config.DataDir
|
|
// is not set. It is created automatically and removed when
|
|
// Shutdown() is called.
|
|
DataDir string
|
|
|
|
// Key is the optional encryption key for the keyring.
|
|
Key string
|
|
|
|
// All HTTP servers started. Used to prevent server leaks and preserve
|
|
// backwards compatibility.
|
|
Servers []*HTTPServer
|
|
|
|
// Server is a reference to the primary, started HTTP endpoint.
|
|
// It is valid after Start().
|
|
Server *HTTPServer
|
|
|
|
// Agent is the embedded Nomad agent.
|
|
// It is valid after Start().
|
|
*Agent
|
|
|
|
// RootToken is auto-bootstrapped if ACLs are enabled
|
|
RootToken *structs.ACLToken
|
|
|
|
// ports that are reserved through freeport that must be returned at
|
|
// the end of a test, done when Shutdown() is called.
|
|
ports []int
|
|
|
|
// Enterprise specifies if the agent is enterprise or not
|
|
Enterprise bool
|
|
|
|
// shutdown is set to true if agent has been shutdown
|
|
shutdown bool
|
|
}
|
|
|
|
// NewTestAgent returns a started agent with the given name and
|
|
// configuration. The caller should call Shutdown() to stop the agent and
|
|
// remove temporary directories.
|
|
func NewTestAgent(t testing.TB, name string, configCallback func(*Config)) *TestAgent {
|
|
logger := testlog.HCLogger(t)
|
|
logger.SetLevel(testlog.HCLoggerTestLevel())
|
|
a := &TestAgent{
|
|
T: t,
|
|
Name: name,
|
|
ConfigCallback: configCallback,
|
|
Enterprise: EnterpriseTestAgent,
|
|
logger: logger,
|
|
}
|
|
a.Start()
|
|
return a
|
|
}
|
|
|
|
// Start starts a test agent.
|
|
func (a *TestAgent) Start() *TestAgent {
|
|
if a.Agent != nil {
|
|
a.T.Fatalf("TestAgent already started")
|
|
}
|
|
if a.Config == nil {
|
|
a.Config = a.config()
|
|
}
|
|
defaultEnterpriseTestServerConfig(a.Config.Server)
|
|
|
|
if a.Config.DataDir == "" {
|
|
name := "agent"
|
|
if a.Name != "" {
|
|
name = a.Name + "-agent"
|
|
}
|
|
name = strings.ReplaceAll(name, "/", "_")
|
|
d, err := ioutil.TempDir(TempDir, name)
|
|
if err != nil {
|
|
a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err)
|
|
}
|
|
a.DataDir = d
|
|
a.Config.DataDir = d
|
|
a.Config.NomadConfig.DataDir = d
|
|
}
|
|
|
|
i := 10
|
|
|
|
advertiseAddrs := *a.Config.AdvertiseAddrs
|
|
RETRY:
|
|
i--
|
|
|
|
// Clear out the advertise addresses such that through retries we
|
|
// re-normalize the addresses correctly instead of using the values from the
|
|
// last port selection that had a port conflict.
|
|
newAddrs := advertiseAddrs
|
|
a.Config.AdvertiseAddrs = &newAddrs
|
|
a.pickRandomPorts(a.Config)
|
|
if a.Config.NodeName == "" {
|
|
a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC)
|
|
}
|
|
|
|
// Create a null logger before initializing the keyring. This is typically
|
|
// done using the agent's logger. However, it hasn't been created yet.
|
|
logger := hclog.NewNullLogger()
|
|
|
|
// write the keyring
|
|
if a.Key != "" {
|
|
writeKey := func(key, filename string) {
|
|
path := filepath.Join(a.Config.DataDir, filename)
|
|
if err := initKeyring(path, key, logger); err != nil {
|
|
a.T.Fatalf("Error creating keyring %s: %s", path, err)
|
|
}
|
|
}
|
|
writeKey(a.Key, serfKeyring)
|
|
}
|
|
|
|
// we need the err var in the next exit condition
|
|
agent, err := a.start()
|
|
if err == nil {
|
|
a.Agent = agent
|
|
} else if i == 0 {
|
|
a.T.Fatalf("%s: Error starting agent: %v", a.Name, err)
|
|
} else {
|
|
|
|
if agent != nil {
|
|
agent.Shutdown()
|
|
}
|
|
wait := time.Duration(rand.Int31n(2000)) * time.Millisecond
|
|
a.T.Logf("%s: retrying in %v", a.Name, wait)
|
|
time.Sleep(wait)
|
|
|
|
// Clean out the data dir if we are responsible for it before we
|
|
// try again, since the old ports may have gotten written to
|
|
// the data dir, such as in the Raft configuration.
|
|
if a.DataDir != "" {
|
|
if err := os.RemoveAll(a.DataDir); err != nil {
|
|
a.T.Fatalf("%s: Error resetting data dir: %v", a.Name, err)
|
|
}
|
|
}
|
|
|
|
goto RETRY
|
|
}
|
|
|
|
failed := false
|
|
if a.Config.NomadConfig.BootstrapExpect == 1 && a.Config.Server.Enabled {
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
args := &structs.GenericRequest{}
|
|
var leader string
|
|
err := a.RPC("Status.Leader", args, &leader)
|
|
return leader != "", err
|
|
}, func(err error) {
|
|
a.T.Logf("failed to find leader: %v", err)
|
|
failed = true
|
|
})
|
|
} else {
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
|
|
resp := httptest.NewRecorder()
|
|
_, err := a.Server.AgentSelfRequest(resp, req)
|
|
return err == nil && resp.Code == 200, err
|
|
}, func(err error) {
|
|
a.T.Logf("failed to find leader: %v", err)
|
|
failed = true
|
|
})
|
|
}
|
|
if failed {
|
|
a.Agent.Shutdown()
|
|
if i == 0 {
|
|
a.T.Fatalf("ran out of retries trying to start test agent")
|
|
}
|
|
goto RETRY
|
|
}
|
|
|
|
// Check if ACLs enabled. Use special value of PolicyTTL 0s
|
|
// to do a bypass of this step. This is so we can test bootstrap
|
|
// without having to pass down a special flag.
|
|
if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 {
|
|
a.RootToken = mock.ACLManagementToken()
|
|
state := a.Agent.server.State()
|
|
if err := state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, a.RootToken); err != nil {
|
|
a.T.Fatalf("token bootstrap failed: %v", err)
|
|
}
|
|
}
|
|
return a
|
|
}
|
|
|
|
func (a *TestAgent) start() (*Agent, error) {
|
|
inm := metrics.NewInmemSink(10*time.Second, time.Minute)
|
|
metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
|
|
|
|
if inm == nil {
|
|
return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization")
|
|
}
|
|
|
|
agent, err := NewAgent(a.Config, a.logger, testlog.NewWriter(a.T), inm)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Setup the HTTP server
|
|
httpServers, err := NewHTTPServers(agent, a.Config)
|
|
if err != nil {
|
|
return agent, err
|
|
}
|
|
|
|
// TODO: investigate if there is a way to remove the requirement by updating test.
|
|
// Initial pass at implementing this is https://github.com/kevinschoonover/nomad/tree/tests.
|
|
a.Servers = httpServers
|
|
a.Server = httpServers[0]
|
|
return agent, nil
|
|
}
|
|
|
|
// Shutdown stops the agent and removes the data directory if it is
|
|
// managed by the test agent.
|
|
func (a *TestAgent) Shutdown() {
|
|
if a == nil || a.shutdown {
|
|
return
|
|
}
|
|
a.shutdown = true
|
|
|
|
defer func() {
|
|
if a.DataDir != "" {
|
|
_ = os.RemoveAll(a.DataDir)
|
|
}
|
|
}()
|
|
|
|
// shutdown agent before endpoints
|
|
ch := make(chan error, 1)
|
|
go func() {
|
|
defer close(ch)
|
|
for _, srv := range a.Servers {
|
|
srv.Shutdown()
|
|
}
|
|
|
|
ch <- a.Agent.Shutdown()
|
|
}()
|
|
|
|
// one minute grace period on shutdown
|
|
timer, cancel := helper.NewSafeTimer(1 * time.Minute)
|
|
defer cancel()
|
|
|
|
select {
|
|
case err := <-ch:
|
|
if err != nil {
|
|
a.T.Fatalf("agent shutdown error: %v", err)
|
|
}
|
|
case <-timer.C:
|
|
a.T.Fatal("agent shutdown timeout")
|
|
}
|
|
}
|
|
|
|
func (a *TestAgent) HTTPAddr() string {
|
|
if a.Server == nil {
|
|
return ""
|
|
}
|
|
proto := "http://"
|
|
if a.Config.TLSConfig != nil && a.Config.TLSConfig.EnableHTTP {
|
|
proto = "https://"
|
|
}
|
|
return proto + a.Server.Addr
|
|
}
|
|
|
|
func (a *TestAgent) Client() *api.Client {
|
|
conf := api.DefaultConfig()
|
|
conf.Address = a.HTTPAddr()
|
|
c, err := api.NewClient(conf)
|
|
if err != nil {
|
|
a.T.Fatalf("Error creating Nomad API client: %s", err)
|
|
}
|
|
return c
|
|
}
|
|
|
|
// pickRandomPorts selects random ports from fixed size random blocks of
|
|
// ports. This does not eliminate the chance for port conflict but
|
|
// reduces it significantly with little overhead. Furthermore, asking
|
|
// the kernel for a random port by binding to port 0 prolongs the test
|
|
// execution (in our case +20sec) while also not fully eliminating the
|
|
// chance of port conflicts for concurrently executed test binaries.
|
|
// Instead of relying on one set of ports to be sufficient we retry
|
|
// starting the agent with different ports on port conflict.
|
|
func (a *TestAgent) pickRandomPorts(c *Config) {
|
|
ports := ci.PortAllocator.Grab(3)
|
|
a.ports = append(a.ports, ports...)
|
|
|
|
c.Ports.HTTP = ports[0]
|
|
c.Ports.RPC = ports[1]
|
|
c.Ports.Serf = ports[2]
|
|
|
|
if err := c.normalizeAddrs(); err != nil {
|
|
a.T.Fatalf("error normalizing config: %v", err)
|
|
}
|
|
}
|
|
|
|
// TestConfig returns a unique default configuration for testing an agent.
|
|
func (a *TestAgent) config() *Config {
|
|
conf := DevConfig(nil)
|
|
|
|
// Customize the server configuration
|
|
config := nomad.DefaultConfig()
|
|
conf.NomadConfig = config
|
|
|
|
// Setup client config
|
|
conf.ClientConfig = client.DefaultConfig()
|
|
|
|
conf.LogLevel = testlog.HCLoggerTestLevel().String()
|
|
conf.NomadConfig.Logger = a.logger
|
|
conf.ClientConfig.Logger = a.logger
|
|
|
|
// Set the name
|
|
conf.NodeName = a.Name
|
|
|
|
// Bind and set ports
|
|
conf.BindAddr = "127.0.0.1"
|
|
|
|
conf.Consul = sconfig.DefaultConsulConfig()
|
|
conf.Vault.Enabled = new(bool)
|
|
|
|
// Tighten the Serf timing
|
|
config.SerfConfig.MemberlistConfig.SuspicionMult = 2
|
|
config.SerfConfig.MemberlistConfig.RetransmitMult = 2
|
|
config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond
|
|
config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond
|
|
config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
|
|
|
|
// Tighten the Raft timing
|
|
config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond
|
|
config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
|
|
config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
|
|
config.RaftTimeout = 500 * time.Millisecond
|
|
|
|
// Tighten the autopilot timing
|
|
config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
|
|
config.ServerHealthInterval = 50 * time.Millisecond
|
|
config.AutopilotInterval = 100 * time.Millisecond
|
|
|
|
// Tighten the fingerprinter timeouts
|
|
if conf.Client.Options == nil {
|
|
conf.Client.Options = make(map[string]string)
|
|
}
|
|
conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true"
|
|
|
|
if a.ConfigCallback != nil {
|
|
a.ConfigCallback(conf)
|
|
}
|
|
|
|
return conf
|
|
}
|