Fixing flaky TestOverlap test (#14780)

* test: ensure feasible node selected in overlap test

* test: warn when getting close to retry limit
This commit is contained in:
Michael Schurter 2022-10-03 14:35:02 -07:00 committed by GitHub
parent 0a80a58394
commit ed3218c3dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 8 deletions

View File

@ -20,7 +20,7 @@ func TestOverlap(t *testing.T) {
nomadClient := e2eutil.NomadClient(t) nomadClient := e2eutil.NomadClient(t)
e2eutil.WaitForLeader(t, nomadClient) e2eutil.WaitForLeader(t, nomadClient)
// Wait for at least 1 node to be ready and get its ID // Wait for at least 1 feasible node to be ready and get its ID
var node *api.Node var node *api.Node
testutil.Wait(t, func() (bool, error) { testutil.Wait(t, func() (bool, error) {
nodesList, _, err := nomadClient.Nodes().List(nil) nodesList, _, err := nomadClient.Nodes().List(nil)
@ -29,11 +29,21 @@ func TestOverlap(t *testing.T) {
} }
for _, n := range nodesList { for _, n := range nodesList {
if n.Status == "ready" { if n.Status != "ready" {
node, _, err = nomadClient.Nodes().Info(n.ID, nil) continue
must.NoError(t, err)
return true, nil
} }
if n.SchedulingEligibility != "eligible" {
continue
}
node, _, err = nomadClient.Nodes().Info(n.ID, nil)
must.NoError(t, err)
if node.Attributes["kernel.name"] != "linux" {
continue
}
return true, nil
} }
return false, fmt.Errorf("no nodes ready before timeout; need at least 1 ready") return false, fmt.Errorf("no nodes ready before timeout; need at least 1 ready")

View File

@ -3,6 +3,7 @@ package testutil
import ( import (
"fmt" "fmt"
"os" "os"
"runtime"
"testing" "testing"
"time" "time"
@ -17,22 +18,29 @@ type errorFn func(error)
func Wait(t *testing.T, test testFn) { func Wait(t *testing.T, test testFn) {
t.Helper() t.Helper()
retries := 500 * TestMultiplier() retries := 500 * TestMultiplier()
for retries > 0 { warn := int64(float64(retries) * 0.75)
for tries := retries; tries > 0; {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
retries-- tries--
success, err := test() success, err := test()
if success { if success {
return return
} }
if retries == 0 { switch tries {
case 0:
if err == nil { if err == nil {
t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)") t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)")
} else { } else {
t.Fatalf("timeout: %v", err) t.Fatalf("timeout: %v", err)
} }
case warn:
pc, _, _, _ := runtime.Caller(1)
f := runtime.FuncForPC(pc)
t.Logf("%d/%d retries reached for %s (err=%v)", warn, retries, f.Name(), err)
} }
} }
} }