open-nomad/e2e/e2eutil/utils.go

395 lines
12 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package e2eutil
import (
"bytes"
"fmt"
"os"
"testing"
"text/template"
"time"
api "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/jobspec2"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
)
// retries is used to control how many times to retry checking if the cluster has a leader yet
const retries = 500
func WaitForLeader(t *testing.T, nomadClient *api.Client) {
statusAPI := nomadClient.Status()
testutil.WaitForResultRetries(retries, func() (bool, error) {
leader, err := statusAPI.Leader()
return leader != "", err
}, func(err error) {
require.NoError(t, err, "failed to find leader")
})
}
// WaitForNodesReady waits until at least `nodes` number of nodes are ready or
// fails the test.
func WaitForNodesReady(t *testing.T, nomadClient *api.Client, nodes int) {
nodesAPI := nomadClient.Nodes()
testutil.WaitForResultRetries(retries, func() (bool, error) {
defer time.Sleep(time.Millisecond * 100)
nodesList, _, err := nodesAPI.List(nil)
if err != nil {
return false, fmt.Errorf("error listing nodes: %v", err)
}
eligibleNodes := 0
for _, node := range nodesList {
if node.Status == "ready" {
eligibleNodes++
}
}
return eligibleNodes >= nodes, fmt.Errorf("only %d nodes ready (wanted at least %d)", eligibleNodes, nodes)
}, func(err error) {
require.NoError(t, err, "failed to get enough ready nodes")
})
}
func stringToPtrOrNil(s string) *string {
if s == "" {
return nil
}
return pointer.Of(s)
}
func Parse2(t *testing.T, jobFile string) (*api.Job, error) {
f, err := os.Open(jobFile)
require.NoError(t, err)
return jobspec2.Parse(jobFile, f)
}
func RegisterAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
// Parse job
job, err := Parse2(t, jobFile)
require.NoError(t, err)
// Set custom job ID (distinguish among tests)
job.ID = pointer.Of(jobID)
// Set a Consul "operator" token for the job, if provided.
job.ConsulToken = stringToPtrOrNil(cToken)
// Register job
var idx uint64
jobs := nomadClient.Jobs()
testutil.WaitForResult(func() (bool, error) {
resp, meta, err := jobs.Register(job, nil)
if err != nil {
return false, err
}
idx = meta.LastIndex
return resp.EvalID != "", fmt.Errorf("expected EvalID:%s", pretty.Sprint(resp))
}, func(err error) {
require.NoError(t, err)
})
allocs, _, err := jobs.Allocations(jobID, false, &api.QueryOptions{WaitIndex: idx})
require.NoError(t, err)
return allocs
}
// RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals
// successfully create Allocs.
func RegisterAndWaitForAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
jobs := nomadClient.Jobs()
// Start allocations
RegisterAllocs(t, nomadClient, jobFile, jobID, cToken)
var err error
allocs := []*api.AllocationListStub{}
evals := []*api.Evaluation{}
// Wrap in retry to wait until placement
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Second)
allocs, _, err = jobs.Allocations(jobID, false, nil)
if len(allocs) == 0 {
evals, _, err = nomadClient.Jobs().Evaluations(jobID, nil)
return false, fmt.Errorf("no allocations for job %v", jobID)
}
return true, nil
}, func(e error) {
msg := fmt.Sprintf("allocations not placed for %s", jobID)
for _, eval := range evals {
msg += fmt.Sprintf("\n %s - %s", eval.Status, eval.StatusDescription)
}
require.Fail(t, msg, "full evals: %v", pretty.Sprint(evals))
})
require.NoError(t, err) // we only care about the last error
return allocs
}
func WaitForAllocRunning(t *testing.T, nomadClient *api.Client, allocID string) {
t.Helper()
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
return alloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s\n%v", alloc.ClientStatus, pretty.Sprint(alloc))
}, func(err error) {
require.NoError(t, err, "failed to wait on alloc")
})
}
func WaitForAllocTaskRunning(t *testing.T, nomadClient *api.Client, allocID, task string) {
WaitForAllocTaskState(t, nomadClient, allocID, task, structs.TaskStateRunning)
}
func WaitForAllocTaskComplete(t *testing.T, nomadClient *api.Client, allocID, task string) {
WaitForAllocTaskState(t, nomadClient, allocID, task, structs.TaskStateDead)
}
func WaitForAllocTaskState(t *testing.T, nomadClient *api.Client, allocID, task, state string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 500)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
currentState := "n/a"
if taskState := alloc.TaskStates[task]; taskState != nil {
currentState = taskState.State
}
return currentState == state, fmt.Errorf("expected status %s, but was: %s", state, currentState)
}, func(err error) {
t.Fatalf("failed to wait on alloc task: %v", err)
})
}
func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocRunning(t, nomadClient, allocID)
}
}
func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocNotPending(t, nomadClient, allocID)
}
}
func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus)
}, func(err error) {
require.NoError(t, err, "failed to wait on alloc")
})
}
// WaitForJobStopped stops a job and waits for all of its allocs to terminate.
func WaitForJobStopped(t *testing.T, nomadClient *api.Client, job string) {
allocs, _, err := nomadClient.Jobs().Allocations(job, true, nil)
require.NoError(t, err, "error getting allocations for job %q", job)
ids := AllocIDsFromAllocationListStubs(allocs)
_, _, err = nomadClient.Jobs().Deregister(job, true, nil)
require.NoError(t, err, "error deregistering job %q", job)
for _, id := range ids {
WaitForAllocStopped(t, nomadClient, id)
}
}
func WaitForAllocsStopped(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocStopped(t, nomadClient, allocID)
}
}
func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string) *api.Allocation {
var alloc *api.Allocation
var err error
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err = nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
switch alloc.ClientStatus {
case structs.AllocClientStatusComplete:
return true, nil
case structs.AllocClientStatusFailed:
return true, nil
case structs.AllocClientStatusLost:
return true, nil
default:
return false, fmt.Errorf("expected stopped alloc, but was: %s",
alloc.ClientStatus)
}
}, func(err error) {
require.NoError(t, err, "failed to wait on alloc")
})
return alloc
}
func WaitForAllocStatus(t *testing.T, nomadClient *api.Client, allocID string, status string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}
switch alloc.ClientStatus {
case status:
return true, nil
default:
return false, fmt.Errorf("expected %s alloc, but was: %s", status, alloc.ClientStatus)
}
}, func(err error) {
t.Fatalf("failed to wait on alloc: %v", err)
})
}
func WaitForAllocsStatus(t *testing.T, nomadClient *api.Client, allocIDs []string, status string) {
for _, allocID := range allocIDs {
WaitForAllocStatus(t, nomadClient, allocID, status)
}
}
func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
allocIDs := make([]string, 0, len(allocs))
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
return allocIDs
}
func DeploymentsForJob(t *testing.T, nomadClient *api.Client, jobID string) []*api.Deployment {
ds, _, err := nomadClient.Deployments().List(nil)
require.NoError(t, err)
out := []*api.Deployment{}
for _, d := range ds {
if d.JobID == jobID {
out = append(out, d)
}
}
return out
}
func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, status string, statusDesc string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
deploy, _, err := nomadClient.Deployments().Info(deployID, nil)
if err != nil {
return false, err
}
if deploy.Status == status && deploy.StatusDescription == statusDesc {
return true, nil
}
return false, fmt.Errorf("expected status %s \"%s\", but got: %s \"%s\"",
status,
statusDesc,
deploy.Status,
deploy.StatusDescription,
)
}, func(err error) {
require.NoError(t, err, "failed to wait on deployment")
})
}
// DumpEvals for a job. This is intended to be used during test development or
// prior to exiting a test after an assertion failed.
func DumpEvals(c *api.Client, jobID string) string {
evals, _, err := c.Jobs().Evaluations(jobID, nil)
if err != nil {
return fmt.Sprintf("error retrieving evals for job %q: %s", jobID, err)
}
if len(evals) == 0 {
return fmt.Sprintf("no evals found for job %q", jobID)
}
buf := bytes.NewBuffer(nil)
for i, e := range evals {
err := EvalTemplate.Execute(buf, map[string]interface{}{
"Index": i + 1,
"Total": len(evals),
"Eval": e,
})
if err != nil {
fmt.Fprintf(buf, "error rendering eval: %s\n", err)
}
}
return buf.String()
}
var EvalTemplate = template.Must(template.New("dump_eval").Parse(
`{{.Index}}/{{.Total}} Job {{.Eval.JobID}} Eval {{.Eval.ID}}
Type: {{.Eval.Type}}
TriggeredBy: {{.Eval.TriggeredBy}}
{{- if .Eval.DeploymentID}}
Deployment: {{.Eval.DeploymentID}}
{{- end}}
Status: {{.Eval.Status}} {{if .Eval.StatusDescription}}({{.Eval.StatusDescription}}){{end}}
{{- if .Eval.Wait}}
Wait: {{.Eval.Wait}} <- DEPRECATED
{{- end}}
{{- if not .Eval.WaitUntil.IsZero}}
WaitUntil: {{.Eval.WaitUntil}}
{{- end}}
{{- if .Eval.NextEval}}
NextEval: {{.Eval.NextEval}}
{{- end}}
{{- if .Eval.PreviousEval}}
PrevEval: {{.Eval.PreviousEval}}
{{- end}}
{{- if .Eval.BlockedEval}}
BlockedEval: {{.Eval.BlockedEval}}
{{- end}}
{{- if .Eval.FailedTGAllocs }}
Failed Allocs:
{{- end}}
{{- range $k, $v := .Eval.FailedTGAllocs}}
Failed Group: {{$k}}
NodesEvaluated: {{$v.NodesEvaluated}}
NodesFiltered: {{$v.NodesFiltered}}
NodesAvailable: {{range $dc, $n := $v.NodesAvailable}}{{$dc}}:{{$n}} {{end}}
NodesExhausted: {{$v.NodesExhausted}}
ClassFiltered: {{len $v.ClassFiltered}}
ConstraintFilt: {{len $v.ConstraintFiltered}}
DimensionExhst: {{range $d, $n := $v.DimensionExhausted}}{{$d}}:{{$n}} {{end}}
ResourcesExhst: {{range $r, $n := $v.ResourcesExhausted}}{{$r}}:{{$n}} {{end}}
QuotaExhausted: {{range $i, $q := $v.QuotaExhausted}}{{$q}} {{end}}
CoalescedFail: {{$v.CoalescedFailures}}
ScoreMetaData: {{len $v.ScoreMetaData}}
AllocationTime: {{$v.AllocationTime}}
{{- end}}
{{- if .Eval.QueuedAllocations}}
QueuedAllocs: {{range $k, $n := .Eval.QueuedAllocations}}{{$k}}:{{$n}} {{end}}
{{- end}}
SnapshotIdx: {{.Eval.SnapshotIndex}}
CreateIndex: {{.Eval.CreateIndex}}
ModifyIndex: {{.Eval.ModifyIndex}}
`))