f8884d8b52
Fixes #16517 Given a 3 Server cluster with at least 1 Client connected to Follower 1: If a NodeMeta.{Apply,Read} for the Client request is received by Follower 1 with `AllowStale = false` the Follower will forward the request to the Leader. The Leader, not being connected to the target Client, will forward the RPC to Follower 1. Follower 1, seeing AllowStale=false, will forward the request to the Leader. The Leader, not being connected to... well hoppefully you get the picture: an infinite loop occurs.
384 lines
10 KiB
Go
384 lines
10 KiB
Go
package testutil
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/kr/pretty"
|
|
"github.com/shoenig/test/must"
|
|
"github.com/shoenig/test/wait"
|
|
)
|
|
|
|
type testFn func() (bool, error)
|
|
type errorFn func(error)
|
|
|
|
func Wait(t *testing.T, test testFn) {
|
|
t.Helper()
|
|
retries := 500 * TestMultiplier()
|
|
warn := int64(float64(retries) * 0.75)
|
|
for tries := retries; tries > 0; {
|
|
time.Sleep(10 * time.Millisecond)
|
|
tries--
|
|
|
|
success, err := test()
|
|
if success {
|
|
return
|
|
}
|
|
|
|
switch tries {
|
|
case 0:
|
|
if err == nil {
|
|
t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)")
|
|
} else {
|
|
t.Fatalf("timeout: %v", err)
|
|
}
|
|
case warn:
|
|
pc, _, _, _ := runtime.Caller(1)
|
|
f := runtime.FuncForPC(pc)
|
|
t.Logf("%d/%d retries reached for %s (err=%v)", warn, retries, f.Name(), err)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
func WaitForResult(test testFn, error errorFn) {
|
|
WaitForResultRetries(500*TestMultiplier(), test, error)
|
|
}
|
|
|
|
func WaitForResultRetries(retries int64, test testFn, error errorFn) {
|
|
for retries > 0 {
|
|
time.Sleep(10 * time.Millisecond)
|
|
retries--
|
|
|
|
success, err := test()
|
|
if success {
|
|
return
|
|
}
|
|
|
|
if retries == 0 {
|
|
error(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// WaitForResultUntil waits the duration for the test to pass.
|
|
// Otherwise error is called after the deadline expires.
|
|
func WaitForResultUntil(until time.Duration, test testFn, errorFunc errorFn) {
|
|
var success bool
|
|
var err error
|
|
deadline := time.Now().Add(until)
|
|
for time.Now().Before(deadline) {
|
|
success, err = test()
|
|
if success {
|
|
return
|
|
}
|
|
// Sleep some arbitrary fraction of the deadline
|
|
time.Sleep(until / 30)
|
|
}
|
|
errorFunc(err)
|
|
}
|
|
|
|
// AssertUntil asserts the test function passes throughout the given duration.
|
|
// Otherwise error is called on failure.
|
|
func AssertUntil(until time.Duration, test testFn, error errorFn) {
|
|
deadline := time.Now().Add(until)
|
|
for time.Now().Before(deadline) {
|
|
success, err := test()
|
|
if !success {
|
|
error(err)
|
|
return
|
|
}
|
|
// Sleep some arbitrary fraction of the deadline
|
|
time.Sleep(until / 30)
|
|
}
|
|
}
|
|
|
|
// TestMultiplier returns a multiplier for retries and waits given environment
|
|
// the tests are being run under.
|
|
func TestMultiplier() int64 {
|
|
if IsCI() {
|
|
return 4
|
|
}
|
|
|
|
return 1
|
|
}
|
|
|
|
// Timeout takes the desired timeout and increases it if running in Travis
|
|
func Timeout(original time.Duration) time.Duration {
|
|
return original * time.Duration(TestMultiplier())
|
|
}
|
|
|
|
func IsCI() bool {
|
|
_, ok := os.LookupEnv("CI")
|
|
return ok
|
|
}
|
|
|
|
func IsTravis() bool {
|
|
_, ok := os.LookupEnv("TRAVIS")
|
|
return ok
|
|
}
|
|
|
|
func IsAppVeyor() bool {
|
|
_, ok := os.LookupEnv("APPVEYOR")
|
|
return ok
|
|
}
|
|
|
|
type rpcFn func(string, interface{}, interface{}) error
|
|
|
|
// WaitForLeader blocks until a leader is elected.
|
|
func WaitForLeader(t testing.TB, rpc rpcFn) {
|
|
t.Helper()
|
|
WaitForResult(func() (bool, error) {
|
|
args := &structs.GenericRequest{}
|
|
var leader string
|
|
err := rpc("Status.Leader", args, &leader)
|
|
return leader != "", err
|
|
}, func(err error) {
|
|
t.Fatalf("failed to find leader: %v", err)
|
|
})
|
|
}
|
|
|
|
// WaitForLeaders blocks until each rpcs knows the leader.
|
|
func WaitForLeaders(t testing.TB, rpcs ...rpcFn) string {
|
|
t.Helper()
|
|
|
|
var leader string
|
|
for i := 0; i < len(rpcs); i++ {
|
|
ok := func() (bool, error) {
|
|
leader = ""
|
|
args := &structs.GenericRequest{}
|
|
err := rpcs[i]("Status.Leader", args, &leader)
|
|
return leader != "", err
|
|
}
|
|
must.Wait(t, wait.InitialSuccess(
|
|
wait.TestFunc(ok),
|
|
wait.Timeout(10*time.Second),
|
|
wait.Gap(1*time.Second),
|
|
))
|
|
}
|
|
|
|
return leader
|
|
}
|
|
|
|
// WaitForClient blocks until the client can be found
|
|
func WaitForClient(t testing.TB, rpc rpcFn, nodeID string, region string) {
|
|
t.Helper()
|
|
WaitForClientStatus(t, rpc, nodeID, region, structs.NodeStatusReady)
|
|
}
|
|
|
|
// WaitForClientStatus blocks until the client is in the expected status.
|
|
func WaitForClientStatus(t testing.TB, rpc rpcFn, nodeID string, region string, status string) {
|
|
t.Helper()
|
|
|
|
if region == "" {
|
|
region = "global"
|
|
}
|
|
WaitForResult(func() (bool, error) {
|
|
req := structs.NodeSpecificRequest{
|
|
NodeID: nodeID,
|
|
QueryOptions: structs.QueryOptions{Region: region},
|
|
}
|
|
var out structs.SingleNodeResponse
|
|
|
|
err := rpc("Node.GetNode", &req, &out)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if out.Node == nil {
|
|
return false, fmt.Errorf("node not found")
|
|
}
|
|
if out.Node.Status != status {
|
|
return false, fmt.Errorf("node is %s, not %s", out.Node.Status, status)
|
|
}
|
|
return true, nil
|
|
}, func(err error) {
|
|
t.Fatalf("failed to wait for node staus: %v", err)
|
|
})
|
|
|
|
t.Logf("[TEST] Client for test %s %s, id: %s, region: %s", t.Name(), status, nodeID, region)
|
|
}
|
|
|
|
// WaitForVotingMembers blocks until autopilot promotes all server peers
|
|
// to be voting members.
|
|
//
|
|
// Useful for tests that change cluster topology (e.g. kill a node)
|
|
// that should wait until cluster is stable.
|
|
func WaitForVotingMembers(t testing.TB, rpc rpcFn, nPeers int) {
|
|
WaitForResult(func() (bool, error) {
|
|
args := &structs.GenericRequest{}
|
|
args.AllowStale = true
|
|
args.Region = "global"
|
|
args.Namespace = structs.DefaultNamespace
|
|
resp := structs.RaftConfigurationResponse{}
|
|
err := rpc("Operator.RaftGetConfiguration", args, &resp)
|
|
if err != nil {
|
|
return false, fmt.Errorf("failed to query raft: %v", err)
|
|
}
|
|
|
|
if len(resp.Servers) != nPeers {
|
|
return false, fmt.Errorf("expected %d peers found %d", nPeers, len(resp.Servers))
|
|
}
|
|
|
|
for _, s := range resp.Servers {
|
|
if !s.Voter {
|
|
return false, fmt.Errorf("found nonvoting server: %v", s)
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}, func(err error) {
|
|
t.Fatalf("failed to wait until voting members: %v", err)
|
|
})
|
|
}
|
|
|
|
// RegisterJobWithToken registers a job and uses the job's Region and Namespace.
|
|
func RegisterJobWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) {
|
|
WaitForResult(func() (bool, error) {
|
|
args := &structs.JobRegisterRequest{}
|
|
args.Job = job
|
|
args.WriteRequest.Region = job.Region
|
|
args.AuthToken = token
|
|
args.Namespace = job.Namespace
|
|
var jobResp structs.JobRegisterResponse
|
|
err := rpc("Job.Register", args, &jobResp)
|
|
return err == nil, fmt.Errorf("Job.Register error: %v", err)
|
|
}, func(err error) {
|
|
t.Fatalf("error registering job: %v", err)
|
|
})
|
|
|
|
t.Logf("Job %q registered", job.ID)
|
|
}
|
|
|
|
func RegisterJob(t testing.TB, rpc rpcFn, job *structs.Job) {
|
|
RegisterJobWithToken(t, rpc, job, "")
|
|
}
|
|
|
|
func WaitForRunningWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) []*structs.AllocListStub {
|
|
RegisterJobWithToken(t, rpc, job, token)
|
|
|
|
var resp structs.JobAllocationsResponse
|
|
|
|
// This can be quite slow if the job has expensive setup such as
|
|
// downloading large artifacts or creating a chroot.
|
|
WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) {
|
|
args := &structs.JobSpecificRequest{}
|
|
args.JobID = job.ID
|
|
args.QueryOptions.Region = job.Region
|
|
args.AuthToken = token
|
|
args.Namespace = job.Namespace
|
|
err := rpc("Job.Allocations", args, &resp)
|
|
if err != nil {
|
|
return false, fmt.Errorf("Job.Allocations error: %v", err)
|
|
}
|
|
|
|
if len(resp.Allocations) == 0 {
|
|
evals := structs.JobEvaluationsResponse{}
|
|
must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals"))
|
|
return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations))
|
|
}
|
|
|
|
for _, alloc := range resp.Allocations {
|
|
if alloc.ClientStatus == structs.AllocClientStatusPending {
|
|
return false, fmt.Errorf("alloc not running: id=%v tg=%v status=%v",
|
|
alloc.ID, alloc.TaskGroup, alloc.ClientStatus)
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}, func(err error) {
|
|
must.NoError(t, err)
|
|
})
|
|
|
|
return resp.Allocations
|
|
}
|
|
|
|
// WaitForRunning runs a job and blocks until all allocs are out of pending.
|
|
func WaitForRunning(t testing.TB, rpc rpcFn, job *structs.Job) []*structs.AllocListStub {
|
|
return WaitForRunningWithToken(t, rpc, job, "")
|
|
}
|
|
|
|
// WaitforJobAllocStatus blocks until the ClientStatus of allocations for a job
|
|
// match the expected map of <ClientStatus>: <count>.
|
|
func WaitForJobAllocStatus(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int) {
|
|
t.Helper()
|
|
WaitForJobAllocStatusWithToken(t, rpc, job, allocStatus, "")
|
|
}
|
|
|
|
// WaitForJobAllocStatusWithToken behaves the same way as WaitForJobAllocStatus
|
|
// but is used for clusters with ACL enabled.
|
|
func WaitForJobAllocStatusWithToken(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int, token string) []*structs.AllocListStub {
|
|
t.Helper()
|
|
|
|
var allocs []*structs.AllocListStub
|
|
WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) {
|
|
args := &structs.JobSpecificRequest{
|
|
JobID: job.ID,
|
|
QueryOptions: structs.QueryOptions{
|
|
AuthToken: token,
|
|
Namespace: job.Namespace,
|
|
Region: job.Region,
|
|
},
|
|
}
|
|
|
|
var resp structs.JobAllocationsResponse
|
|
err := rpc("Job.Allocations", args, &resp)
|
|
if err != nil {
|
|
return false, fmt.Errorf("Job.Allocations error: %v", err)
|
|
}
|
|
|
|
if len(resp.Allocations) == 0 {
|
|
evals := structs.JobEvaluationsResponse{}
|
|
must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals"))
|
|
return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations))
|
|
}
|
|
|
|
allocs = resp.Allocations
|
|
|
|
got := map[string]int{}
|
|
for _, alloc := range resp.Allocations {
|
|
got[alloc.ClientStatus]++
|
|
}
|
|
if diff := cmp.Diff(allocStatus, got); diff != "" {
|
|
return false, fmt.Errorf("alloc status mismatch (-want +got):\n%s", diff)
|
|
}
|
|
return true, nil
|
|
}, func(err error) {
|
|
must.NoError(t, err)
|
|
})
|
|
|
|
return allocs
|
|
}
|
|
|
|
// WaitForFiles blocks until all the files in the slice are present
|
|
func WaitForFiles(t testing.TB, files []string) {
|
|
WaitForResult(func() (bool, error) {
|
|
return FilesExist(files)
|
|
}, func(err error) {
|
|
t.Fatalf("missing expected files: %v", err)
|
|
})
|
|
}
|
|
|
|
// WaitForFilesUntil blocks until duration or all the files in the slice are present
|
|
func WaitForFilesUntil(t testing.TB, files []string, until time.Duration) {
|
|
WaitForResultUntil(until, func() (bool, error) {
|
|
return FilesExist(files)
|
|
}, func(err error) {
|
|
t.Fatalf("missing expected files: %v", err)
|
|
})
|
|
}
|
|
|
|
// FilesExist verifies all files in the slice are present
|
|
func FilesExist(files []string) (bool, error) {
|
|
for _, f := range files {
|
|
if _, err := os.Stat(f); os.IsNotExist(err) {
|
|
return false, fmt.Errorf("expected file not found: %v", f)
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|