2018-12-20 03:25:32 +00:00
package e2eutil
import (
2019-01-08 22:37:08 +00:00
"fmt"
2021-01-13 20:00:40 +00:00
"os"
2020-06-19 18:03:10 +00:00
"strings"
2019-01-04 13:53:50 +00:00
"testing"
"time"
2020-06-19 18:03:10 +00:00
consulapi "github.com/hashicorp/consul/api"
2018-12-20 03:25:32 +00:00
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/helper"
2021-01-13 20:00:40 +00:00
"github.com/hashicorp/nomad/jobspec2"
2019-01-18 04:32:45 +00:00
"github.com/hashicorp/nomad/nomad/structs"
2018-12-20 03:25:32 +00:00
"github.com/hashicorp/nomad/testutil"
2019-01-26 00:51:20 +00:00
"github.com/kr/pretty"
2018-12-20 03:25:32 +00:00
"github.com/stretchr/testify/require"
)
// retries is used to control how many times to retry checking if the cluster has a leader yet
const retries = 500
func WaitForLeader ( t * testing . T , nomadClient * api . Client ) {
statusAPI := nomadClient . Status ( )
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
leader , err := statusAPI . Leader ( )
return leader != "" , err
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to find leader" )
2018-12-20 03:25:32 +00:00
} )
}
2019-01-08 22:37:08 +00:00
// WaitForNodesReady waits until at least `nodes` number of nodes are ready or
// fails the test.
2019-01-03 22:16:20 +00:00
func WaitForNodesReady ( t * testing . T , nomadClient * api . Client , nodes int ) {
nodesAPI := nomadClient . Nodes ( )
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
2019-01-18 04:32:45 +00:00
defer time . Sleep ( time . Millisecond * 100 )
2019-01-03 22:16:20 +00:00
nodesList , _ , err := nodesAPI . List ( nil )
2019-01-08 22:37:08 +00:00
if err != nil {
return false , fmt . Errorf ( "error listing nodes: %v" , err )
}
2019-01-03 22:16:20 +00:00
eligibleNodes := 0
for _ , node := range nodesList {
if node . Status == "ready" {
eligibleNodes ++
}
}
2019-01-08 22:37:08 +00:00
return eligibleNodes >= nodes , fmt . Errorf ( "only %d nodes ready (wanted at least %d)" , eligibleNodes , nodes )
2019-01-03 22:16:20 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to get enough ready nodes" )
2019-01-03 22:16:20 +00:00
} )
}
2020-01-28 22:33:59 +00:00
func stringToPtrOrNil ( s string ) * string {
if s == "" {
return nil
}
return helper . StringToPtr ( s )
}
2021-01-13 20:00:40 +00:00
func Parse2 ( t * testing . T , jobFile string ) ( * api . Job , error ) {
f , err := os . Open ( jobFile )
require . NoError ( t , err )
return jobspec2 . Parse ( jobFile , f )
}
2020-01-28 22:33:59 +00:00
func RegisterAllocs ( t * testing . T , nomadClient * api . Client , jobFile , jobID , cToken string ) [ ] * api . AllocationListStub {
2018-12-20 03:25:32 +00:00
// Parse job
2021-01-13 20:00:40 +00:00
job , err := Parse2 ( t , jobFile )
2020-04-03 14:22:22 +00:00
require . NoError ( t , err )
2020-01-28 22:33:59 +00:00
// Set custom job ID (distinguish among tests)
2019-01-03 22:16:20 +00:00
job . ID = helper . StringToPtr ( jobID )
2018-12-20 03:25:32 +00:00
2020-01-28 22:33:59 +00:00
// Set a Consul "operator" token for the job, if provided.
job . ConsulToken = stringToPtrOrNil ( cToken )
2018-12-20 03:25:32 +00:00
// Register job
2019-09-10 17:45:16 +00:00
var idx uint64
2018-12-20 03:25:32 +00:00
jobs := nomadClient . Jobs ( )
2019-01-26 00:51:20 +00:00
testutil . WaitForResult ( func ( ) ( bool , error ) {
2019-09-10 17:45:16 +00:00
resp , meta , err := jobs . Register ( job , nil )
2019-01-26 00:51:20 +00:00
if err != nil {
return false , err
}
2019-09-10 17:45:16 +00:00
idx = meta . LastIndex
2019-01-26 00:51:20 +00:00
return resp . EvalID != "" , fmt . Errorf ( "expected EvalID:%s" , pretty . Sprint ( resp ) )
} , func ( err error ) {
2020-04-03 14:22:22 +00:00
require . NoError ( t , err )
2019-01-26 00:51:20 +00:00
} )
2018-12-20 03:25:32 +00:00
2020-01-28 22:33:59 +00:00
allocs , _ , err := jobs . Allocations ( jobID , false , & api . QueryOptions { WaitIndex : idx } )
require . NoError ( t , err )
2019-05-20 14:54:28 +00:00
return allocs
}
2020-08-18 22:37:02 +00:00
// RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals
// successfully create Allocs.
2020-01-28 22:33:59 +00:00
func RegisterAndWaitForAllocs ( t * testing . T , nomadClient * api . Client , jobFile , jobID , cToken string ) [ ] * api . AllocationListStub {
2019-05-20 14:54:28 +00:00
jobs := nomadClient . Jobs ( )
2019-05-20 19:10:32 +00:00
// Start allocations
2020-01-28 22:33:59 +00:00
RegisterAllocs ( t , nomadClient , jobFile , jobID , cToken )
2020-04-03 19:52:58 +00:00
2020-04-03 14:22:22 +00:00
var err error
2020-04-03 19:52:58 +00:00
allocs := [ ] * api . AllocationListStub { }
evals := [ ] * api . Evaluation { }
2019-05-20 19:10:32 +00:00
2018-12-20 03:25:32 +00:00
// Wrap in retry to wait until placement
2021-01-26 14:24:55 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Second )
2020-04-03 14:22:22 +00:00
allocs , _ , err = jobs . Allocations ( jobID , false , nil )
2021-01-26 14:24:55 +00:00
if len ( allocs ) == 0 {
2020-04-03 19:52:58 +00:00
evals , _ , err = nomadClient . Jobs ( ) . Evaluations ( jobID , nil )
2021-01-26 14:24:55 +00:00
return false , fmt . Errorf ( "no allocations for job %v" , jobID )
2020-04-03 19:52:58 +00:00
}
2020-04-03 14:22:22 +00:00
2021-01-26 14:24:55 +00:00
return true , nil
} , func ( e error ) {
msg := fmt . Sprintf ( "allocations not placed for %s" , jobID )
2020-04-03 19:52:58 +00:00
for _ , eval := range evals {
msg += fmt . Sprintf ( "\n %s - %s" , eval . Status , eval . StatusDescription )
}
2021-01-26 14:24:55 +00:00
require . Fail ( t , msg , "full evals: %v" , pretty . Sprint ( evals ) )
} )
2020-04-03 19:52:58 +00:00
require . NoError ( t , err ) // we only care about the last error
2021-01-26 14:24:55 +00:00
2019-01-03 22:16:20 +00:00
return allocs
2018-12-20 03:25:32 +00:00
}
2019-01-18 04:32:45 +00:00
func WaitForAllocRunning ( t * testing . T , nomadClient * api . Client , allocID string ) {
2021-01-26 14:24:55 +00:00
t . Helper ( )
2019-01-18 04:32:45 +00:00
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
2019-01-23 19:09:49 +00:00
time . Sleep ( time . Millisecond * 100 )
2019-01-18 04:32:45 +00:00
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
2021-01-26 14:24:55 +00:00
return alloc . ClientStatus == structs . AllocClientStatusRunning , fmt . Errorf ( "expected status running, but was: %s\n%v" , alloc . ClientStatus , pretty . Sprint ( alloc ) )
2019-01-18 04:32:45 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2019-01-18 04:32:45 +00:00
} )
}
2019-05-20 14:54:28 +00:00
2021-01-27 14:44:01 +00:00
func WaitForAllocTaskRunning ( t * testing . T , nomadClient * api . Client , allocID , task string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
state := "n/a"
if task := alloc . TaskStates [ task ] ; task != nil {
state = task . State
}
return state == structs . AllocClientStatusRunning , fmt . Errorf ( "expected status running, but was: %s" , state )
} , func ( err error ) {
t . Fatalf ( "failed to wait on alloc: %v" , err )
} )
}
2020-01-28 22:33:59 +00:00
func WaitForAllocsRunning ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string ) {
for _ , allocID := range allocIDs {
WaitForAllocRunning ( t , nomadClient , allocID )
}
}
2020-02-04 16:55:50 +00:00
func WaitForAllocsNotPending ( t * testing . T , nomadClient * api . Client , allocIDs [ ] string ) {
for _ , allocID := range allocIDs {
WaitForAllocNotPending ( t , nomadClient , allocID )
}
}
func WaitForAllocNotPending ( t * testing . T , nomadClient * api . Client , allocID string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
2020-02-04 18:59:39 +00:00
return alloc . ClientStatus != structs . AllocClientStatusPending , fmt . Errorf ( "expected status not pending, but was: %s" , alloc . ClientStatus )
2020-02-04 16:55:50 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2020-02-04 16:55:50 +00:00
} )
}
2020-08-18 22:37:02 +00:00
// WaitForJobStopped stops a job and waits for all of its allocs to terminate.
func WaitForJobStopped ( t * testing . T , nomadClient * api . Client , job string ) {
allocs , _ , err := nomadClient . Jobs ( ) . Allocations ( job , true , nil )
require . NoError ( t , err , "error getting allocations for job %q" , job )
ids := AllocIDsFromAllocationListStubs ( allocs )
_ , _ , err = nomadClient . Jobs ( ) . Deregister ( job , true , nil )
require . NoError ( t , err , "error deregistering job %q" , job )
for _ , id := range ids {
WaitForAllocStopped ( t , nomadClient , id )
}
}
2020-02-04 16:55:50 +00:00
2020-04-05 18:52:08 +00:00
func WaitForAllocStopped ( t * testing . T , nomadClient * api . Client , allocID string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
alloc , _ , err := nomadClient . Allocations ( ) . Info ( allocID , nil )
if err != nil {
return false , err
}
switch alloc . ClientStatus {
case structs . AllocClientStatusComplete :
return true , nil
case structs . AllocClientStatusFailed :
return true , nil
case structs . AllocClientStatusLost :
return true , nil
default :
return false , fmt . Errorf ( "expected stopped alloc, but was: %s" ,
alloc . ClientStatus )
}
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on alloc" )
2020-04-05 18:52:08 +00:00
} )
}
2020-01-28 22:33:59 +00:00
func AllocIDsFromAllocationListStubs ( allocs [ ] * api . AllocationListStub ) [ ] string {
allocIDs := make ( [ ] string , 0 , len ( allocs ) )
for _ , alloc := range allocs {
allocIDs = append ( allocIDs , alloc . ID )
}
return allocIDs
}
2019-06-04 18:31:42 +00:00
func DeploymentsForJob ( t * testing . T , nomadClient * api . Client , jobID string ) [ ] * api . Deployment {
2019-06-04 15:25:18 +00:00
ds , _ , err := nomadClient . Deployments ( ) . List ( nil )
2019-06-04 18:31:42 +00:00
require . NoError ( t , err )
2019-06-04 15:25:18 +00:00
out := [ ] * api . Deployment { }
for _ , d := range ds {
if d . JobID == jobID {
out = append ( out , d )
}
}
return out
}
2019-05-20 14:54:28 +00:00
func WaitForDeployment ( t * testing . T , nomadClient * api . Client , deployID string , status string , statusDesc string ) {
testutil . WaitForResultRetries ( retries , func ( ) ( bool , error ) {
time . Sleep ( time . Millisecond * 100 )
deploy , _ , err := nomadClient . Deployments ( ) . Info ( deployID , nil )
if err != nil {
return false , err
}
if deploy . Status == status && deploy . StatusDescription == statusDesc {
return true , nil
}
2019-05-20 17:27:28 +00:00
return false , fmt . Errorf ( "expected status %s \"%s\", but got: %s \"%s\"" ,
status ,
statusDesc ,
2019-06-04 15:25:18 +00:00
deploy . Status ,
deploy . StatusDescription ,
2019-05-20 17:27:28 +00:00
)
2019-05-20 14:54:28 +00:00
} , func ( err error ) {
2021-01-26 14:24:55 +00:00
require . NoError ( t , err , "failed to wait on deployment" )
2019-05-20 14:54:28 +00:00
} )
}
2020-06-19 18:03:10 +00:00
// CheckServicesPassing scans for passing agent checks via the given agent API
// client.
//
// Deprecated: not useful in e2e, where more than one node exists and Nomad jobs
// are placed non-deterministically. The Consul agentAPI only knows about what
// is registered on its node, and cannot be used to query for cluster wide state.
func CheckServicesPassing ( t * testing . T , agentAPI * consulapi . Agent , allocIDs [ ] string ) {
failing := map [ string ] * consulapi . AgentCheck { }
for i := 0 ; i < 60 ; i ++ {
checks , err := agentAPI . Checks ( )
require . NoError ( t , err )
// Filter out checks for other services
for cid , check := range checks {
found := false
for _ , allocID := range allocIDs {
if strings . Contains ( check . ServiceID , allocID ) {
found = true
break
}
}
if ! found {
delete ( checks , cid )
}
}
// Ensure checks are all passing
failing = map [ string ] * consulapi . AgentCheck { }
for _ , check := range checks {
if check . Status != "passing" {
failing [ check . CheckID ] = check
break
}
}
if len ( failing ) == 0 {
break
}
t . Logf ( "still %d checks not passing" , len ( failing ) )
time . Sleep ( time . Second )
}
require . Len ( t , failing , 0 , pretty . Sprint ( failing ) )
}