2018-12-20 03:25:32 +00:00
|
|
|
package e2eutil
|
|
|
|
|
|
|
|
import (
|
2019-01-08 22:37:08 +00:00
|
|
|
"fmt"
|
2020-06-19 18:03:10 +00:00
|
|
|
"strings"
|
2019-01-04 13:53:50 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2020-06-19 18:03:10 +00:00
|
|
|
consulapi "github.com/hashicorp/consul/api"
|
2018-12-20 03:25:32 +00:00
|
|
|
"github.com/hashicorp/nomad/api"
|
|
|
|
"github.com/hashicorp/nomad/helper"
|
|
|
|
"github.com/hashicorp/nomad/jobspec"
|
2019-01-18 04:32:45 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2018-12-20 03:25:32 +00:00
|
|
|
"github.com/hashicorp/nomad/testutil"
|
2019-01-26 00:51:20 +00:00
|
|
|
"github.com/kr/pretty"
|
2020-04-03 19:52:58 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2018-12-20 03:25:32 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
)
|
|
|
|
|
|
|
|
// retries is used to control how many times to retry checking if the cluster has a leader yet
|
|
|
|
const retries = 500
|
|
|
|
|
|
|
|
func WaitForLeader(t *testing.T, nomadClient *api.Client) {
|
|
|
|
statusAPI := nomadClient.Status()
|
|
|
|
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
|
|
|
leader, err := statusAPI.Leader()
|
|
|
|
return leader != "", err
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("failed to find leader: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-01-08 22:37:08 +00:00
|
|
|
// WaitForNodesReady waits until at least `nodes` number of nodes are ready or
|
|
|
|
// fails the test.
|
2019-01-03 22:16:20 +00:00
|
|
|
func WaitForNodesReady(t *testing.T, nomadClient *api.Client, nodes int) {
|
|
|
|
nodesAPI := nomadClient.Nodes()
|
|
|
|
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
2019-01-18 04:32:45 +00:00
|
|
|
defer time.Sleep(time.Millisecond * 100)
|
2019-01-03 22:16:20 +00:00
|
|
|
nodesList, _, err := nodesAPI.List(nil)
|
2019-01-08 22:37:08 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, fmt.Errorf("error listing nodes: %v", err)
|
|
|
|
}
|
|
|
|
|
2019-01-03 22:16:20 +00:00
|
|
|
eligibleNodes := 0
|
|
|
|
for _, node := range nodesList {
|
|
|
|
if node.Status == "ready" {
|
|
|
|
eligibleNodes++
|
|
|
|
}
|
|
|
|
}
|
2019-01-08 22:37:08 +00:00
|
|
|
|
|
|
|
return eligibleNodes >= nodes, fmt.Errorf("only %d nodes ready (wanted at least %d)", eligibleNodes, nodes)
|
2019-01-03 22:16:20 +00:00
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("failed to get enough ready nodes: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-28 22:33:59 +00:00
|
|
|
func stringToPtrOrNil(s string) *string {
|
|
|
|
if s == "" {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return helper.StringToPtr(s)
|
|
|
|
}
|
|
|
|
|
|
|
|
func RegisterAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
|
|
|
|
|
2018-12-20 03:25:32 +00:00
|
|
|
// Parse job
|
|
|
|
job, err := jobspec.ParseFile(jobFile)
|
2020-04-03 14:22:22 +00:00
|
|
|
require.NoError(t, err)
|
2020-01-28 22:33:59 +00:00
|
|
|
|
|
|
|
// Set custom job ID (distinguish among tests)
|
2019-01-03 22:16:20 +00:00
|
|
|
job.ID = helper.StringToPtr(jobID)
|
2018-12-20 03:25:32 +00:00
|
|
|
|
2020-01-28 22:33:59 +00:00
|
|
|
// Set a Consul "operator" token for the job, if provided.
|
|
|
|
job.ConsulToken = stringToPtrOrNil(cToken)
|
|
|
|
|
2018-12-20 03:25:32 +00:00
|
|
|
// Register job
|
2019-09-10 17:45:16 +00:00
|
|
|
var idx uint64
|
2018-12-20 03:25:32 +00:00
|
|
|
jobs := nomadClient.Jobs()
|
2019-01-26 00:51:20 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2019-09-10 17:45:16 +00:00
|
|
|
resp, meta, err := jobs.Register(job, nil)
|
2019-01-26 00:51:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2019-09-10 17:45:16 +00:00
|
|
|
idx = meta.LastIndex
|
2019-01-26 00:51:20 +00:00
|
|
|
return resp.EvalID != "", fmt.Errorf("expected EvalID:%s", pretty.Sprint(resp))
|
|
|
|
}, func(err error) {
|
2020-04-03 14:22:22 +00:00
|
|
|
require.NoError(t, err)
|
2019-01-26 00:51:20 +00:00
|
|
|
})
|
2018-12-20 03:25:32 +00:00
|
|
|
|
2020-01-28 22:33:59 +00:00
|
|
|
allocs, _, err := jobs.Allocations(jobID, false, &api.QueryOptions{WaitIndex: idx})
|
|
|
|
require.NoError(t, err)
|
2019-05-20 14:54:28 +00:00
|
|
|
return allocs
|
|
|
|
}
|
|
|
|
|
2020-08-18 22:37:02 +00:00
|
|
|
// RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals
|
|
|
|
// successfully create Allocs.
|
2020-01-28 22:33:59 +00:00
|
|
|
func RegisterAndWaitForAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
|
2019-05-20 14:54:28 +00:00
|
|
|
jobs := nomadClient.Jobs()
|
|
|
|
|
2019-05-20 19:10:32 +00:00
|
|
|
// Start allocations
|
2020-01-28 22:33:59 +00:00
|
|
|
RegisterAllocs(t, nomadClient, jobFile, jobID, cToken)
|
2020-04-03 19:52:58 +00:00
|
|
|
|
2020-04-03 14:22:22 +00:00
|
|
|
var err error
|
2020-04-03 19:52:58 +00:00
|
|
|
allocs := []*api.AllocationListStub{}
|
|
|
|
evals := []*api.Evaluation{}
|
2019-05-20 19:10:32 +00:00
|
|
|
|
2018-12-20 03:25:32 +00:00
|
|
|
// Wrap in retry to wait until placement
|
2020-04-03 19:52:58 +00:00
|
|
|
ok := assert.Eventually(t, func() bool {
|
2020-04-03 14:22:22 +00:00
|
|
|
allocs, _, err = jobs.Allocations(jobID, false, nil)
|
2020-04-03 19:52:58 +00:00
|
|
|
if len(allocs) < 1 {
|
|
|
|
evals, _, err = nomadClient.Jobs().Evaluations(jobID, nil)
|
|
|
|
}
|
2020-04-03 14:22:22 +00:00
|
|
|
return len(allocs) > 0
|
|
|
|
}, 30*time.Second, time.Second)
|
|
|
|
|
2020-04-03 19:52:58 +00:00
|
|
|
msg := fmt.Sprintf("allocations not placed for %s", jobID)
|
|
|
|
if !ok && len(evals) > 0 {
|
|
|
|
for _, eval := range evals {
|
|
|
|
msg += fmt.Sprintf("\n %s - %s", eval.Status, eval.StatusDescription)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
require.Truef(t, ok, msg)
|
|
|
|
require.NoError(t, err) // we only care about the last error
|
2019-01-03 22:16:20 +00:00
|
|
|
return allocs
|
2018-12-20 03:25:32 +00:00
|
|
|
}
|
2019-01-18 04:32:45 +00:00
|
|
|
|
|
|
|
func WaitForAllocRunning(t *testing.T, nomadClient *api.Client, allocID string) {
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
2019-01-23 19:09:49 +00:00
|
|
|
time.Sleep(time.Millisecond * 100)
|
2019-01-18 04:32:45 +00:00
|
|
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return alloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s", alloc.ClientStatus)
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("failed to wait on alloc: %v", err)
|
|
|
|
})
|
|
|
|
}
|
2019-05-20 14:54:28 +00:00
|
|
|
|
2020-01-28 22:33:59 +00:00
|
|
|
func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []string) {
|
|
|
|
for _, allocID := range allocIDs {
|
|
|
|
WaitForAllocRunning(t, nomadClient, allocID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-04 16:55:50 +00:00
|
|
|
func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) {
|
|
|
|
for _, allocID := range allocIDs {
|
|
|
|
WaitForAllocNotPending(t, nomadClient, allocID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) {
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
|
|
|
time.Sleep(time.Millisecond * 100)
|
|
|
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
2020-02-04 18:59:39 +00:00
|
|
|
return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus)
|
2020-02-04 16:55:50 +00:00
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("failed to wait on alloc: %v", err)
|
|
|
|
})
|
|
|
|
}
|
2020-08-18 22:37:02 +00:00
|
|
|
|
|
|
|
// WaitForJobStopped stops a job and waits for all of its allocs to terminate.
|
|
|
|
func WaitForJobStopped(t *testing.T, nomadClient *api.Client, job string) {
|
|
|
|
allocs, _, err := nomadClient.Jobs().Allocations(job, true, nil)
|
|
|
|
require.NoError(t, err, "error getting allocations for job %q", job)
|
|
|
|
ids := AllocIDsFromAllocationListStubs(allocs)
|
|
|
|
_, _, err = nomadClient.Jobs().Deregister(job, true, nil)
|
|
|
|
require.NoError(t, err, "error deregistering job %q", job)
|
|
|
|
for _, id := range ids {
|
|
|
|
WaitForAllocStopped(t, nomadClient, id)
|
|
|
|
}
|
|
|
|
}
|
2020-02-04 16:55:50 +00:00
|
|
|
|
2020-04-05 18:52:08 +00:00
|
|
|
func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string) {
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
|
|
|
time.Sleep(time.Millisecond * 100)
|
|
|
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
switch alloc.ClientStatus {
|
|
|
|
case structs.AllocClientStatusComplete:
|
|
|
|
return true, nil
|
|
|
|
case structs.AllocClientStatusFailed:
|
|
|
|
return true, nil
|
|
|
|
case structs.AllocClientStatusLost:
|
|
|
|
return true, nil
|
|
|
|
default:
|
|
|
|
return false, fmt.Errorf("expected stopped alloc, but was: %s",
|
|
|
|
alloc.ClientStatus)
|
|
|
|
}
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("failed to wait on alloc: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-28 22:33:59 +00:00
|
|
|
func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
|
|
|
|
allocIDs := make([]string, 0, len(allocs))
|
|
|
|
for _, alloc := range allocs {
|
|
|
|
allocIDs = append(allocIDs, alloc.ID)
|
|
|
|
}
|
|
|
|
return allocIDs
|
|
|
|
}
|
|
|
|
|
2019-06-04 18:31:42 +00:00
|
|
|
func DeploymentsForJob(t *testing.T, nomadClient *api.Client, jobID string) []*api.Deployment {
|
2019-06-04 15:25:18 +00:00
|
|
|
ds, _, err := nomadClient.Deployments().List(nil)
|
2019-06-04 18:31:42 +00:00
|
|
|
require.NoError(t, err)
|
2019-06-04 15:25:18 +00:00
|
|
|
|
|
|
|
out := []*api.Deployment{}
|
|
|
|
for _, d := range ds {
|
|
|
|
if d.JobID == jobID {
|
|
|
|
out = append(out, d)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
2019-05-20 14:54:28 +00:00
|
|
|
func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, status string, statusDesc string) {
|
|
|
|
testutil.WaitForResultRetries(retries, func() (bool, error) {
|
|
|
|
time.Sleep(time.Millisecond * 100)
|
|
|
|
deploy, _, err := nomadClient.Deployments().Info(deployID, nil)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if deploy.Status == status && deploy.StatusDescription == statusDesc {
|
|
|
|
return true, nil
|
|
|
|
}
|
2019-05-20 17:27:28 +00:00
|
|
|
return false, fmt.Errorf("expected status %s \"%s\", but got: %s \"%s\"",
|
|
|
|
status,
|
|
|
|
statusDesc,
|
2019-06-04 15:25:18 +00:00
|
|
|
deploy.Status,
|
|
|
|
deploy.StatusDescription,
|
2019-05-20 17:27:28 +00:00
|
|
|
)
|
2019-05-20 14:54:28 +00:00
|
|
|
|
|
|
|
}, func(err error) {
|
2019-05-21 13:21:44 +00:00
|
|
|
t.Fatalf("failed to wait on deployment: %v", err)
|
2019-05-20 14:54:28 +00:00
|
|
|
})
|
|
|
|
}
|
2020-06-19 18:03:10 +00:00
|
|
|
|
|
|
|
// CheckServicesPassing scans for passing agent checks via the given agent API
|
|
|
|
// client.
|
|
|
|
//
|
|
|
|
// Deprecated: not useful in e2e, where more than one node exists and Nomad jobs
|
|
|
|
// are placed non-deterministically. The Consul agentAPI only knows about what
|
|
|
|
// is registered on its node, and cannot be used to query for cluster wide state.
|
|
|
|
func CheckServicesPassing(t *testing.T, agentAPI *consulapi.Agent, allocIDs []string) {
|
|
|
|
failing := map[string]*consulapi.AgentCheck{}
|
|
|
|
for i := 0; i < 60; i++ {
|
|
|
|
checks, err := agentAPI.Checks()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Filter out checks for other services
|
|
|
|
for cid, check := range checks {
|
|
|
|
found := false
|
|
|
|
for _, allocID := range allocIDs {
|
|
|
|
if strings.Contains(check.ServiceID, allocID) {
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !found {
|
|
|
|
delete(checks, cid)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure checks are all passing
|
|
|
|
failing = map[string]*consulapi.AgentCheck{}
|
|
|
|
for _, check := range checks {
|
|
|
|
if check.Status != "passing" {
|
|
|
|
failing[check.CheckID] = check
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(failing) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
t.Logf("still %d checks not passing", len(failing))
|
|
|
|
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
}
|
|
|
|
require.Len(t, failing, 0, pretty.Sprint(failing))
|
|
|
|
}
|