2023-04-10 15:36:59 +00:00
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
2018-12-20 03:25:32 +00:00
package e2eutil
import (
2022-10-14 21:15:07 +00:00
"bytes"
2019-01-08 22:37:08 +00:00
"fmt"
2021-01-13 20:00:40 +00:00
"os"
2019-01-04 13:53:50 +00:00
"testing"
2022-10-14 21:15:07 +00:00
"text/template"
2019-01-04 13:53:50 +00:00
"time"
2021-04-14 23:02:42 +00:00
api "github.com/hashicorp/nomad/api"
2022-08-17 16:26:34 +00:00
"github.com/hashicorp/nomad/helper/pointer"
2021-01-13 20:00:40 +00:00
"github.com/hashicorp/nomad/jobspec2"
2019-01-18 04:32:45 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2018-12-20 03:25:32 +00:00
"github.com/hashicorp/nomad/testutil"
2019-01-26 00:51:20 +00:00
"github.com/kr/pretty"
2018-12-20 03:25:32 +00:00
"github.com/stretchr/testify/require"
)
// retries is used to control how many times to retry checking if the cluster has a leader yet
const retries = 500
func WaitForLeader ( t * testing . T , nomadClient * api . Client ) {
statusAPI := nomadClient . Status ( )
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
leader , err := statusAPI . Leader ( )
return leader != "" , err
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to find leader" )
2018-12-20 03:25:32 +00:00
} )
}
2019-01-08 22:37:08 +00:00
// WaitForNodesReady waits until at least `nodes` number of nodes are ready or
// fails the test.
2019-01-03 22:16:20 +00:00
func WaitForNodesReady ( t * testing . T , nomadClient * api . Client , nodes int ) {
nodesAPI := nomadClient . Nodes ( )
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
2019-01-18 04:32:45 +00:00
defer time . Sleep ( time . Millisecond * 100 )
2019-01-03 22:16:20 +00:00
nodesList , _ , err := nodesAPI . List ( nil )
2019-01-08 22:37:08 +00:00
if err != nil {
return false , fmt . Errorf ( "error listing nodes: %v" , err )
}
2019-01-03 22:16:20 +00:00
eligibleNodes := 0
for _ , node := range nodesList {
if node . Status == "ready" {
eligibleNodes ++
}
}
2019-01-08 22:37:08 +00:00
return eligibleNodes >= nodes , fmt . Errorf ( "only %d nodes ready (wanted at least %d)" , eligibleNodes , nodes )
2019-01-03 22:16:20 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to get enough ready nodes" )
2019-01-03 22:16:20 +00:00
} )
}
2020-01-28 22:33:59 +00:00
func stringToPtrOrNil ( s string ) * string {
if s == "" {
return nil
}
2022-08-17 16:26:34 +00:00
return pointer . Of ( s )
2020-01-28 22:33:59 +00:00
}
2021-01-13 20:00:40 +00:00
func Parse2 ( t * testing . T , jobFile string ) ( * api . Job , error ) {
f , err := os . Open ( jobFile )
require . NoError ( t , err )
return jobspec2 . Parse ( jobFile , f )
}
2020-01-28 22:33:59 +00:00
func RegisterAllocs ( t * testing . T , nomadClient * api . Client , jobFile , jobID , cToken string ) [ ] * api . AllocationListStub {
2018-12-20 03:25:32 +00:00
// Parse job
2021-01-13 20:00:40 +00:00
job , err := Parse2 ( t , jobFile )
2020-04-03 14:22:22 +00:00
require . NoError ( t , err )
2020-01-28 22:33:59 +00:00
// Set custom job ID (distinguish among tests)
2022-08-17 16:26:34 +00:00
job . ID = pointer . Of ( jobID )
2018-12-20 03:25:32 +00:00
2020-01-28 22:33:59 +00:00
// Set a Consul "operator" token for the job, if provided.
job . ConsulToken = stringToPtrOrNil ( cToken )
2018-12-20 03:25:32 +00:00
// Register job
2019-09-10 17:45:16 +00:00
var idx uint64
2018-12-20 03:25:32 +00:00
jobs := nomadClient . Jobs ( )
2019-01-26 00:51:20 +00:00
testutil . WaitForResult ( func ( ) ( bool , error ) {
2019-09-10 17:45:16 +00:00
resp , meta , err := jobs . Register ( job , nil )
2019-01-26 00:51:20 +00:00
if err != nil {
return false , err
}
2019-09-10 17:45:16 +00:00
idx = meta . LastIndex
2019-01-26 00:51:20 +00:00
return resp . EvalID != "" , fmt . Errorf ( "expected EvalID:%s" , pretty . Sprint ( resp ) )
} , func ( err error ) {
2020-04-03 14:22:22 +00:00
require . NoError ( t , err )
2019-01-26 00:51:20 +00:00
} )
2018-12-20 03:25:32 +00:00
2020-01-28 22:33:59 +00:00
allocs , _ , err := jobs . Allocations ( jobID , false , & api . QueryOptions { WaitIndex : idx } )
require . NoError ( t , err )
2019-05-20 14:54:28 +00:00
return allocs
}
2020-08-18 22:37:02 +00:00
// RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals
// successfully create Allocs.
2020-01-28 22:33:59 +00:00
func RegisterAndWaitForAllocs ( t * testing . T , nomadClient * api . Client , jobFile , jobID , cToken string ) [ ] * api . AllocationListStub {
2019-05-20 14:54:28 +00:00
jobs := nomadClient . Jobs ( )
2019-05-20 19:10:32 +00:00
// Start allocations
2020-01-28 22:33:59 +00:00
RegisterAllocs ( t , nomadClient , jobFile , jobID , cToken )
2020-04-03 19:52:58 +00:00
2020-04-03 14:22:22 +00:00
var err error
2020-04-03 19:52:58 +00:00
allocs := [ ] * api . AllocationListStub { }
evals := [ ] * api . Evaluation { }
2019-05-20 19:10:32 +00:00
2018-12-20 03:25:32 +00:00
// Wrap in retry to wait until placement
2021-01-26 14:24:55 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Second )
2020-04-03 14:22:22 +00:00
allocs , _ , err = jobs . Allocations ( jobID , false , nil )
2021-01-26 14:24:55 +00:00
if len ( allocs ) == 0 {
2020-04-03 19:52:58 +00:00
evals , _ , err = nomadClient . Jobs ( ) . Evaluations ( jobID , nil )
2021-01-26 14:24:55 +00:00
return false , fmt . Errorf ( "no allocations for job %v" , jobID )
2020-04-03 19:52:58 +00:00
}
2020-04-03 14:22:22 +00:00
2021-01-26 14:24:55 +00:00
return true , nil
} , func ( e error ) {
msg := fmt . Sprintf ( "allocations not placed for %s" , jobID )
2020-04-03 19:52:58 +00:00
for _ , eval := range evals {
msg += fmt . Sprintf ( "\n %s - %s" , eval . Status , eval . StatusDescription )
}
2021-01-26 14:24:55 +00:00
require . Fail ( t , msg , "full evals: %v" , pretty . Sprint ( evals ) )
} )
2020-04-03 19:52:58 +00:00
require . NoError ( t , err ) // we only care about the last error
2021-01-26 14:24:55 +00:00
2019-01-03 22:16:20 +00:00
return allocs
2018-12-20 03:25:32 +00:00
}
2019-01-18 04:32:45 +00:00
func WaitForAllocRunning ( t * testing . T , nomadClient * api . Client , allocID string ) {
2021-01-26 14:24:55 +00:00
t . Helper ( )
2019-01-18 04:32:45 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
2019-01-23 19:09:49 +00:00
time . Sleep ( time . Millisecond * 100 )
2019-01-18 04:32:45 +00:00
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
2021-01-26 14:24:55 +00:00
return alloc . ClientStatus == structs . AllocClientStatusRunning , fmt . Errorf ( "expected status running, but was: %s\n%v" , alloc . ClientStatus , pretty . Sprint ( alloc ) )
2019-01-18 04:32:45 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2019-01-18 04:32:45 +00:00
} )
}
2019-05-20 14:54:28 +00:00
2021-01-27 14:44:01 +00:00
func WaitForAllocTaskRunning ( t * testing . T , nomadClient * api . Client , allocID , task string ) {
2022-11-04 14:50:11 +00:00
WaitForAllocTaskState ( t , nomadClient , allocID , task , structs . TaskStateRunning )
}
func WaitForAllocTaskComplete ( t * testing . T , nomadClient * api . Client , allocID , task string ) {
WaitForAllocTaskState ( t , nomadClient , allocID , task , structs . TaskStateDead )
}
func WaitForAllocTaskState ( t * testing . T , nomadClient * api . Client , allocID , task , state string ) {
2021-01-27 14:44:01 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
2022-11-04 14:50:11 +00:00
time . Sleep ( time . Millisecond * 500 )
2021-01-27 14:44:01 +00:00
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
2022-11-04 14:50:11 +00:00
currentState := "n/a"
if taskState := alloc . TaskStates [ task ] ; taskState != nil {
currentState = taskState . State
2021-01-27 14:44:01 +00:00
}
2022-11-04 14:50:11 +00:00
return currentState == state , fmt . Errorf ( "expected status %s, but was: %s" , state , currentState )
2021-01-27 14:44:01 +00:00
} , func ( err error ) {
2022-11-04 14:50:11 +00:00
t . Fatalf ( "failed to wait on alloc task: %v" , err )
2021-01-27 14:44:01 +00:00
} )
}
2020-01-28 22:33:59 +00:00
func WaitForAllocsRunning ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string ) {
for _ , allocID := range allocIDs {
WaitForAllocRunning ( t , nomadClient , allocID )
}
}
2020-02-04 16:55:50 +00:00
func WaitForAllocsNotPending ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string ) {
for _ , allocID := range allocIDs {
WaitForAllocNotPending ( t , nomadClient , allocID )
}
}
func WaitForAllocNotPending ( t * testing . T , nomadClient * api . Client , allocID string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
2020-02-04 18:59:39 +00:00
return alloc . ClientStatus != structs . AllocClientStatusPending , fmt . Errorf ( "expected status not pending, but was: %s" , alloc . ClientStatus )
2020-02-04 16:55:50 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2020-02-04 16:55:50 +00:00
} )
}
2020-08-18 22:37:02 +00:00
// WaitForJobStopped stops a job and waits for all of its allocs to terminate.
func WaitForJobStopped ( t * testing . T , nomadClient * api . Client , job string ) {
allocs , _ , err := nomadClient . Jobs ( ) . Allocations ( job , true , nil )
require . NoError ( t , err , "error getting allocations for job %q" , job )
ids := AllocIDsFromAllocationListStubs ( allocs )
_ , _ , err = nomadClient . Jobs ( ) . Deregister ( job , true , nil )
require . NoError ( t , err , "error deregistering job %q" , job )
for _ , id := range ids {
WaitForAllocStopped ( t , nomadClient , id )
}
}
2020-02-04 16:55:50 +00:00
2021-04-14 23:02:42 +00:00
func WaitForAllocsStopped ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string ) {
for _ , allocID := range allocIDs {
WaitForAllocStopped ( t , nomadClient , allocID )
}
}
2023-02-02 18:59:14 +00:00
func WaitForAllocStopped ( t * testing . T , nomadClient * api . Client , allocID string ) * api . Allocation {
var alloc * api . Allocation
var err error
2020-04-05 18:52:08 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
2023-02-02 18:59:14 +00:00
alloc , _ , err = nomadClient . Allocations ( ) . Info ( allocID , nil )
2020-04-05 18:52:08 +00:00
if err != nil {
return false , err
}
switch alloc . ClientStatus {
case structs . AllocClientStatusComplete :
return true , nil
case structs . AllocClientStatusFailed :
return true , nil
case structs . AllocClientStatusLost :
return true , nil
default :
return false , fmt . Errorf ( "expected stopped alloc, but was: %s" ,
alloc . ClientStatus )
}
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2020-04-05 18:52:08 +00:00
} )
2023-02-02 18:59:14 +00:00
return alloc
2020-04-05 18:52:08 +00:00
}
2020-10-09 21:31:38 +00:00
func WaitForAllocStatus ( t * testing . T , nomadClient * api . Client , allocID string , status string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
switch alloc . ClientStatus {
case status :
return true , nil
default :
return false , fmt . Errorf ( "expected %s alloc, but was: %s" , status , alloc . ClientStatus )
}
} , func ( err error ) {
t . Fatalf ( "failed to wait on alloc: %v" , err )
} )
}
func WaitForAllocsStatus ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string , status string ) {
for _ , allocID := range allocIDs {
WaitForAllocStatus ( t , nomadClient , allocID , status )
}
}
2020-01-28 22:33:59 +00:00
func AllocIDsFromAllocationListStubs ( allocs [ ] * api . AllocationListStub ) [ ] string {
allocIDs := make ( [ ] string , 0 , len ( allocs ) )
for _ , alloc := range allocs {
allocIDs = append ( allocIDs , alloc . ID )
}
return allocIDs
}
2019-06-04 18:31:42 +00:00
func DeploymentsForJob ( t * testing . T , nomadClient * api . Client , jobID string ) [ ] * api . Deployment {
2019-06-04 15:25:18 +00:00
ds , _ , err := nomadClient . Deployments ( ) . List ( nil )
2019-06-04 18:31:42 +00:00
require . NoError ( t , err )
2019-06-04 15:25:18 +00:00
out := [ ] * api . Deployment { }
for _ , d := range ds {
if d . JobID == jobID {
out = append ( out , d )
}
}
return out
}
2019-05-20 14:54:28 +00:00
func WaitForDeployment ( t * testing . T , nomadClient * api . Client , deployID string , status string , statusDesc string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
deploy , _ , err := nomadClient . Deployments ( ) . Info ( deployID , nil )
if err != nil {
return false , err
}
if deploy . Status == status && deploy . StatusDescription == statusDesc {
return true , nil
}
2019-05-20 17:27:28 +00:00
return false , fmt . Errorf ( "expected status %s \"%s\", but got: %s \"%s\"" ,
status ,
statusDesc ,
2019-06-04 15:25:18 +00:00
deploy . Status ,
deploy . StatusDescription ,
2019-05-20 17:27:28 +00:00
)
2019-05-20 14:54:28 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on deployment" )
2019-05-20 14:54:28 +00:00
} )
}
2022-10-14 21:15:07 +00:00
// DumpEvals for a job. This is intended to be used during test development or
// prior to exiting a test after an assertion failed.
func DumpEvals ( c * api . Client , jobID string ) string {
evals , _ , err := c . Jobs ( ) . Evaluations ( jobID , nil )
if err != nil {
return fmt . Sprintf ( "error retrieving evals for job %q: %s" , jobID , err )
}
if len ( evals ) == 0 {
return fmt . Sprintf ( "no evals found for job %q" , jobID )
}
buf := bytes . NewBuffer ( nil )
for i , e := range evals {
err := EvalTemplate . Execute ( buf , map [ string ] interface { } {
"Index" : i + 1 ,
"Total" : len ( evals ) ,
"Eval" : e ,
} )
if err != nil {
fmt . Fprintf ( buf , "error rendering eval: %s\n" , err )
}
}
return buf . String ( )
}
var EvalTemplate = template . Must ( template . New ( "dump_eval" ) . Parse (
` { { . Index } } / { { . Total } } Job { { . Eval . JobID } } Eval { { . Eval . ID } }
Type : { { . Eval . Type } }
TriggeredBy : { { . Eval . TriggeredBy } }
2022-10-21 14:53:26 +00:00
{ { - if . Eval . DeploymentID } }
2022-10-14 21:15:07 +00:00
Deployment : { { . Eval . DeploymentID } }
2022-10-21 14:53:26 +00:00
{ { - end } }
Status : { { . Eval . Status } } { { if . Eval . StatusDescription } } ( { { . Eval . StatusDescription } } ) { { end } }
{ { - if . Eval . Wait } }
Wait : { { . Eval . Wait } } <- DEPRECATED
{ { - end } }
{ { - if not . Eval . WaitUntil . IsZero } }
WaitUntil : { { . Eval . WaitUntil } }
{ { - end } }
{ { - if . Eval . NextEval } }
2022-10-14 21:15:07 +00:00
NextEval : { { . Eval . NextEval } }
2022-10-21 14:53:26 +00:00
{ { - end } }
{ { - if . Eval . PreviousEval } }
2022-10-14 21:15:07 +00:00
PrevEval : { { . Eval . PreviousEval } }
2022-10-21 14:53:26 +00:00
{ { - end } }
{ { - if . Eval . BlockedEval } }
2022-10-14 21:15:07 +00:00
BlockedEval : { { . Eval . BlockedEval } }
2022-10-21 14:53:26 +00:00
{ { - end } }
{ { - if . Eval . FailedTGAllocs } }
Failed Allocs :
{ { - end } }
2022-10-14 21:15:07 +00:00
{ { - range $ k , $ v := . Eval . FailedTGAllocs } }
Failed Group : { { $ k } }
NodesEvaluated : { { $ v . NodesEvaluated } }
2022-10-21 14:53:26 +00:00
NodesFiltered : { { $ v . NodesFiltered } }
NodesAvailable : { { range $ dc , $ n := $ v . NodesAvailable } } { { $ dc } } : { { $ n } } { { end } }
NodesExhausted : { { $ v . NodesExhausted } }
ClassFiltered : { { len $ v . ClassFiltered } }
ConstraintFilt : { { len $ v . ConstraintFiltered } }
DimensionExhst : { { range $ d , $ n := $ v . DimensionExhausted } } { { $ d } } : { { $ n } } { { end } }
ResourcesExhst : { { range $ r , $ n := $ v . ResourcesExhausted } } { { $ r } } : { { $ n } } { { end } }
QuotaExhausted : { { range $ i , $ q := $ v . QuotaExhausted } } { { $ q } } { { end } }
CoalescedFail : { { $ v . CoalescedFailures } }
ScoreMetaData : { { len $ v . ScoreMetaData } }
AllocationTime : { { $ v . AllocationTime } }
2022-10-14 21:15:07 +00:00
{ { - end } }
2022-10-21 14:53:26 +00:00
{ { - if . Eval . QueuedAllocations } }
2022-10-14 21:15:07 +00:00
QueuedAllocs : { { range $ k , $ n := . Eval . QueuedAllocations } } { { $ k } } : { { $ n } } { { end } }
2022-10-21 14:53:26 +00:00
{ { - end } }
2022-10-14 21:15:07 +00:00
SnapshotIdx : { { . Eval . SnapshotIndex } }
CreateIndex : { { . Eval . CreateIndex } }
ModifyIndex : { { . Eval . ModifyIndex } }
` ) )